aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-06-09 17:54:27 -0500
committerJesús <heckyel@hyperbola.info>2021-06-09 17:54:27 -0500
commit27fe903c511691c078942bef5ee9a05a43b15c8f (patch)
tree50f30ab2ec749b965869518c0a28651f8677f0d3
downloadhypervideo-27fe903c511691c078942bef5ee9a05a43b15c8f.tar.lz
hypervideo-27fe903c511691c078942bef5ee9a05a43b15c8f.tar.xz
hypervideo-27fe903c511691c078942bef5ee9a05a43b15c8f.zip
initial
-rw-r--r--.build.yml31
-rw-r--r--.drone.yml20
-rw-r--r--.flake83
-rw-r--r--.gitignore101
-rw-r--r--.gitlab-ci.yml33
-rw-r--r--AUTHORS249
-rw-r--r--CONTRIBUTING.md435
-rw-r--r--ChangeLog6142
-rw-r--r--LICENSE121
-rw-r--r--MANIFEST.in9
-rw-r--r--Makefile123
-rw-r--r--README.md1447
-rwxr-xr-xbin/hypervideo6
-rw-r--r--devscripts/SizeOfImage.patchbin0 -> 147 bytes
-rw-r--r--devscripts/SizeOfImage_w.patchbin0 -> 148 bytes
-rw-r--r--devscripts/bash-completion.in29
-rwxr-xr-xdevscripts/bash-completion.py30
-rw-r--r--devscripts/buildserver.py433
-rw-r--r--devscripts/check-porn.py60
-rw-r--r--devscripts/fish-completion.in5
-rwxr-xr-xdevscripts/fish-completion.py49
-rw-r--r--devscripts/generate_aes_testdata.py43
-rw-r--r--devscripts/lazy_load_template.py19
-rwxr-xr-xdevscripts/make_contributing.py33
-rw-r--r--devscripts/make_lazy_extractors.py100
-rwxr-xr-xdevscripts/make_readme.py26
-rw-r--r--devscripts/make_supportedsites.py46
-rwxr-xr-xdevscripts/posix-locale.sh6
-rw-r--r--devscripts/prepare_manpage.py79
-rw-r--r--devscripts/run_tests.bat17
-rwxr-xr-xdevscripts/run_tests.sh22
-rw-r--r--devscripts/zsh-completion.in28
-rwxr-xr-xdevscripts/zsh-completion.py49
-rw-r--r--docs/.gitignore1
-rw-r--r--docs/Makefile177
-rw-r--r--docs/conf.py71
-rw-r--r--docs/index.rst23
-rw-r--r--docs/module_guide.rst67
-rw-r--r--docs/supportedsites.md1228
-rw-r--r--hypervideo.plugin.zsh24
-rwxr-xr-xhypervideo_dl/YoutubeDL.py2469
-rw-r--r--hypervideo_dl/__init__.py478
-rwxr-xr-xhypervideo_dl/__main__.py19
-rw-r--r--hypervideo_dl/aes.py361
-rw-r--r--hypervideo_dl/cache.py96
-rw-r--r--hypervideo_dl/compat.py3060
-rw-r--r--hypervideo_dl/downloader/__init__.py61
-rw-r--r--hypervideo_dl/downloader/common.py391
-rw-r--r--hypervideo_dl/downloader/dash.py80
-rw-r--r--hypervideo_dl/downloader/external.py371
-rw-r--r--hypervideo_dl/downloader/f4m.py438
-rw-r--r--hypervideo_dl/downloader/fragment.py279
-rw-r--r--hypervideo_dl/downloader/hls.py216
-rw-r--r--hypervideo_dl/downloader/http.py364
-rw-r--r--hypervideo_dl/downloader/ism.py259
-rw-r--r--hypervideo_dl/downloader/rtmp.py214
-rw-r--r--hypervideo_dl/downloader/rtsp.py47
-rw-r--r--hypervideo_dl/extractor/__init__.py46
-rw-r--r--hypervideo_dl/extractor/abc.py193
-rw-r--r--hypervideo_dl/extractor/abcnews.py158
-rw-r--r--hypervideo_dl/extractor/abcotvs.py137
-rw-r--r--hypervideo_dl/extractor/academicearth.py41
-rw-r--r--hypervideo_dl/extractor/acast.py126
-rw-r--r--hypervideo_dl/extractor/adn.py269
-rw-r--r--hypervideo_dl/extractor/adobeconnect.py37
-rw-r--r--hypervideo_dl/extractor/adobepass.py1572
-rw-r--r--hypervideo_dl/extractor/adobetv.py288
-rw-r--r--hypervideo_dl/extractor/adultswim.py202
-rw-r--r--hypervideo_dl/extractor/aenetworks.py342
-rw-r--r--hypervideo_dl/extractor/afreecatv.py367
-rw-r--r--hypervideo_dl/extractor/airmozilla.py66
-rw-r--r--hypervideo_dl/extractor/aliexpress.py53
-rw-r--r--hypervideo_dl/extractor/aljazeera.py56
-rw-r--r--hypervideo_dl/extractor/allocine.py132
-rw-r--r--hypervideo_dl/extractor/alphaporno.py77
-rw-r--r--hypervideo_dl/extractor/amara.py103
-rw-r--r--hypervideo_dl/extractor/amcnetworks.py119
-rw-r--r--hypervideo_dl/extractor/americastestkitchen.py159
-rw-r--r--hypervideo_dl/extractor/amp.py103
-rw-r--r--hypervideo_dl/extractor/animeondemand.py299
-rw-r--r--hypervideo_dl/extractor/anvato.py381
-rw-r--r--hypervideo_dl/extractor/aol.py139
-rw-r--r--hypervideo_dl/extractor/apa.py95
-rw-r--r--hypervideo_dl/extractor/aparat.py89
-rw-r--r--hypervideo_dl/extractor/appleconnect.py50
-rw-r--r--hypervideo_dl/extractor/applepodcasts.py62
-rw-r--r--hypervideo_dl/extractor/appletrailers.py283
-rw-r--r--hypervideo_dl/extractor/archiveorg.py95
-rw-r--r--hypervideo_dl/extractor/arcpublishing.py174
-rw-r--r--hypervideo_dl/extractor/ard.py452
-rw-r--r--hypervideo_dl/extractor/arkena.py163
-rw-r--r--hypervideo_dl/extractor/arnes.py101
-rw-r--r--hypervideo_dl/extractor/arte.py254
-rw-r--r--hypervideo_dl/extractor/asiancrush.py200
-rw-r--r--hypervideo_dl/extractor/atresplayer.py118
-rw-r--r--hypervideo_dl/extractor/atttechchannel.py55
-rw-r--r--hypervideo_dl/extractor/atvat.py75
-rw-r--r--hypervideo_dl/extractor/audimedia.py93
-rw-r--r--hypervideo_dl/extractor/audioboom.py73
-rw-r--r--hypervideo_dl/extractor/audiomack.py145
-rw-r--r--hypervideo_dl/extractor/awaan.py187
-rw-r--r--hypervideo_dl/extractor/aws.py78
-rw-r--r--hypervideo_dl/extractor/azmedien.py66
-rw-r--r--hypervideo_dl/extractor/baidu.py56
-rw-r--r--hypervideo_dl/extractor/bandaichannel.py37
-rw-r--r--hypervideo_dl/extractor/bandcamp.py391
-rw-r--r--hypervideo_dl/extractor/bbc.py1623
-rw-r--r--hypervideo_dl/extractor/beatport.py103
-rw-r--r--hypervideo_dl/extractor/beeg.py116
-rw-r--r--hypervideo_dl/extractor/behindkink.py46
-rw-r--r--hypervideo_dl/extractor/bellmedia.py88
-rw-r--r--hypervideo_dl/extractor/bet.py80
-rw-r--r--hypervideo_dl/extractor/bfi.py37
-rw-r--r--hypervideo_dl/extractor/bfmtv.py103
-rw-r--r--hypervideo_dl/extractor/bibeltv.py30
-rw-r--r--hypervideo_dl/extractor/bigflix.py78
-rw-r--r--hypervideo_dl/extractor/bild.py40
-rw-r--r--hypervideo_dl/extractor/bilibili.py451
-rw-r--r--hypervideo_dl/extractor/biobiochiletv.py86
-rw-r--r--hypervideo_dl/extractor/biqle.py105
-rw-r--r--hypervideo_dl/extractor/bitchute.py142
-rw-r--r--hypervideo_dl/extractor/bleacherreport.py112
-rw-r--r--hypervideo_dl/extractor/bloomberg.py83
-rw-r--r--hypervideo_dl/extractor/bokecc.py60
-rw-r--r--hypervideo_dl/extractor/bongacams.py60
-rw-r--r--hypervideo_dl/extractor/bostonglobe.py72
-rw-r--r--hypervideo_dl/extractor/box.py98
-rw-r--r--hypervideo_dl/extractor/bpb.py62
-rw-r--r--hypervideo_dl/extractor/br.py311
-rw-r--r--hypervideo_dl/extractor/bravotv.py90
-rw-r--r--hypervideo_dl/extractor/breakcom.py91
-rw-r--r--hypervideo_dl/extractor/brightcove.py681
-rw-r--r--hypervideo_dl/extractor/businessinsider.py48
-rw-r--r--hypervideo_dl/extractor/buzzfeed.py98
-rw-r--r--hypervideo_dl/extractor/byutv.py117
-rw-r--r--hypervideo_dl/extractor/c56.py65
-rw-r--r--hypervideo_dl/extractor/camdemy.py161
-rw-r--r--hypervideo_dl/extractor/cammodels.py98
-rw-r--r--hypervideo_dl/extractor/camtube.py71
-rw-r--r--hypervideo_dl/extractor/camwithher.py89
-rw-r--r--hypervideo_dl/extractor/canalc2.py73
-rw-r--r--hypervideo_dl/extractor/canalplus.py116
-rw-r--r--hypervideo_dl/extractor/canvas.py384
-rw-r--r--hypervideo_dl/extractor/carambatv.py108
-rw-r--r--hypervideo_dl/extractor/cartoonnetwork.py62
-rw-r--r--hypervideo_dl/extractor/cbc.py497
-rw-r--r--hypervideo_dl/extractor/cbs.py115
-rw-r--r--hypervideo_dl/extractor/cbsinteractive.py103
-rw-r--r--hypervideo_dl/extractor/cbslocal.py119
-rw-r--r--hypervideo_dl/extractor/cbsnews.py147
-rw-r--r--hypervideo_dl/extractor/cbssports.py113
-rw-r--r--hypervideo_dl/extractor/ccc.py111
-rw-r--r--hypervideo_dl/extractor/ccma.py155
-rw-r--r--hypervideo_dl/extractor/cctv.py191
-rw-r--r--hypervideo_dl/extractor/cda.py214
-rw-r--r--hypervideo_dl/extractor/ceskatelevize.py289
-rw-r--r--hypervideo_dl/extractor/channel9.py262
-rw-r--r--hypervideo_dl/extractor/charlierose.py54
-rw-r--r--hypervideo_dl/extractor/chaturbate.py109
-rw-r--r--hypervideo_dl/extractor/chilloutzone.py96
-rw-r--r--hypervideo_dl/extractor/chirbit.py91
-rw-r--r--hypervideo_dl/extractor/cinchcast.py58
-rw-r--r--hypervideo_dl/extractor/cinemax.py29
-rw-r--r--hypervideo_dl/extractor/ciscolive.py151
-rw-r--r--hypervideo_dl/extractor/cjsw.py72
-rw-r--r--hypervideo_dl/extractor/cliphunter.py79
-rw-r--r--hypervideo_dl/extractor/clippit.py74
-rw-r--r--hypervideo_dl/extractor/cliprs.py33
-rw-r--r--hypervideo_dl/extractor/clipsyndicate.py54
-rw-r--r--hypervideo_dl/extractor/closertotruth.py92
-rw-r--r--hypervideo_dl/extractor/cloudflarestream.py72
-rw-r--r--hypervideo_dl/extractor/cloudy.py60
-rw-r--r--hypervideo_dl/extractor/clubic.py56
-rw-r--r--hypervideo_dl/extractor/clyp.py82
-rw-r--r--hypervideo_dl/extractor/cmt.py54
-rw-r--r--hypervideo_dl/extractor/cnbc.py71
-rw-r--r--hypervideo_dl/extractor/cnn.py147
-rw-r--r--hypervideo_dl/extractor/comedycentral.py51
-rw-r--r--hypervideo_dl/extractor/common.py3064
-rw-r--r--hypervideo_dl/extractor/commonmistakes.py50
-rw-r--r--hypervideo_dl/extractor/commonprotocols.py60
-rw-r--r--hypervideo_dl/extractor/condenast.py251
-rw-r--r--hypervideo_dl/extractor/contv.py118
-rw-r--r--hypervideo_dl/extractor/corus.py160
-rw-r--r--hypervideo_dl/extractor/coub.py140
-rw-r--r--hypervideo_dl/extractor/cracked.py90
-rw-r--r--hypervideo_dl/extractor/crackle.py200
-rw-r--r--hypervideo_dl/extractor/crooksandliars.py60
-rw-r--r--hypervideo_dl/extractor/crunchyroll.py686
-rw-r--r--hypervideo_dl/extractor/cspan.py244
-rw-r--r--hypervideo_dl/extractor/ctsnews.py87
-rw-r--r--hypervideo_dl/extractor/ctv.py52
-rw-r--r--hypervideo_dl/extractor/ctvnews.py68
-rw-r--r--hypervideo_dl/extractor/cultureunplugged.py70
-rw-r--r--hypervideo_dl/extractor/curiositystream.py174
-rw-r--r--hypervideo_dl/extractor/cwtv.py97
-rw-r--r--hypervideo_dl/extractor/dailymail.py84
-rw-r--r--hypervideo_dl/extractor/dailymotion.py393
-rw-r--r--hypervideo_dl/extractor/daum.py266
-rw-r--r--hypervideo_dl/extractor/dbtv.py57
-rw-r--r--hypervideo_dl/extractor/dctp.py105
-rw-r--r--hypervideo_dl/extractor/deezer.py91
-rw-r--r--hypervideo_dl/extractor/defense.py39
-rw-r--r--hypervideo_dl/extractor/democracynow.py96
-rw-r--r--hypervideo_dl/extractor/dfb.py57
-rw-r--r--hypervideo_dl/extractor/dhm.py59
-rw-r--r--hypervideo_dl/extractor/digg.py56
-rw-r--r--hypervideo_dl/extractor/digiteka.py112
-rw-r--r--hypervideo_dl/extractor/discovery.py118
-rw-r--r--hypervideo_dl/extractor/discoverygo.py175
-rw-r--r--hypervideo_dl/extractor/discoverynetworks.py43
-rw-r--r--hypervideo_dl/extractor/discoveryvr.py59
-rw-r--r--hypervideo_dl/extractor/disney.py170
-rw-r--r--hypervideo_dl/extractor/dispeak.py131
-rw-r--r--hypervideo_dl/extractor/dlive.py97
-rw-r--r--hypervideo_dl/extractor/dotsub.py83
-rw-r--r--hypervideo_dl/extractor/douyutv.py201
-rw-r--r--hypervideo_dl/extractor/dplay.py369
-rw-r--r--hypervideo_dl/extractor/drbonanza.py59
-rw-r--r--hypervideo_dl/extractor/dreisat.py43
-rw-r--r--hypervideo_dl/extractor/dropbox.py40
-rw-r--r--hypervideo_dl/extractor/drtuber.py112
-rw-r--r--hypervideo_dl/extractor/drtv.py355
-rw-r--r--hypervideo_dl/extractor/dtube.py83
-rw-r--r--hypervideo_dl/extractor/dumpert.py80
-rw-r--r--hypervideo_dl/extractor/dvtv.py184
-rw-r--r--hypervideo_dl/extractor/dw.py108
-rw-r--r--hypervideo_dl/extractor/eagleplatform.py206
-rw-r--r--hypervideo_dl/extractor/ebaumsworld.py33
-rw-r--r--hypervideo_dl/extractor/echomsk.py46
-rw-r--r--hypervideo_dl/extractor/egghead.py134
-rw-r--r--hypervideo_dl/extractor/ehow.py38
-rw-r--r--hypervideo_dl/extractor/eighttracks.py164
-rw-r--r--hypervideo_dl/extractor/einthusan.py111
-rw-r--r--hypervideo_dl/extractor/eitb.py88
-rw-r--r--hypervideo_dl/extractor/ellentube.py133
-rw-r--r--hypervideo_dl/extractor/elpais.py95
-rw-r--r--hypervideo_dl/extractor/embedly.py16
-rw-r--r--hypervideo_dl/extractor/engadget.py27
-rw-r--r--hypervideo_dl/extractor/eporner.py132
-rw-r--r--hypervideo_dl/extractor/eroprofile.py92
-rw-r--r--hypervideo_dl/extractor/escapist.py111
-rw-r--r--hypervideo_dl/extractor/espn.py238
-rw-r--r--hypervideo_dl/extractor/esri.py74
-rw-r--r--hypervideo_dl/extractor/europa.py93
-rw-r--r--hypervideo_dl/extractor/expotv.py77
-rw-r--r--hypervideo_dl/extractor/expressen.py101
-rw-r--r--hypervideo_dl/extractor/extractors.py1648
-rw-r--r--hypervideo_dl/extractor/extremetube.py50
-rw-r--r--hypervideo_dl/extractor/eyedotv.py64
-rw-r--r--hypervideo_dl/extractor/facebook.py709
-rw-r--r--hypervideo_dl/extractor/faz.py93
-rw-r--r--hypervideo_dl/extractor/fc2.py160
-rw-r--r--hypervideo_dl/extractor/fczenit.py56
-rw-r--r--hypervideo_dl/extractor/filmon.py178
-rw-r--r--hypervideo_dl/extractor/filmweb.py42
-rw-r--r--hypervideo_dl/extractor/firsttv.py156
-rw-r--r--hypervideo_dl/extractor/fivemin.py54
-rw-r--r--hypervideo_dl/extractor/fivetv.py91
-rw-r--r--hypervideo_dl/extractor/flickr.py116
-rw-r--r--hypervideo_dl/extractor/folketinget.py77
-rw-r--r--hypervideo_dl/extractor/footyroom.py56
-rw-r--r--hypervideo_dl/extractor/formula1.py27
-rw-r--r--hypervideo_dl/extractor/fourtube.py309
-rw-r--r--hypervideo_dl/extractor/fox.py150
-rw-r--r--hypervideo_dl/extractor/fox9.py41
-rw-r--r--hypervideo_dl/extractor/foxgay.py63
-rw-r--r--hypervideo_dl/extractor/foxnews.py127
-rw-r--r--hypervideo_dl/extractor/foxsports.py33
-rw-r--r--hypervideo_dl/extractor/franceculture.py73
-rw-r--r--hypervideo_dl/extractor/franceinter.py59
-rw-r--r--hypervideo_dl/extractor/francetv.py546
-rw-r--r--hypervideo_dl/extractor/freesound.py79
-rw-r--r--hypervideo_dl/extractor/freespeech.py31
-rw-r--r--hypervideo_dl/extractor/freshlive.py83
-rw-r--r--hypervideo_dl/extractor/frontendmasters.py263
-rw-r--r--hypervideo_dl/extractor/fujitv.py35
-rw-r--r--hypervideo_dl/extractor/funimation.py158
-rw-r--r--hypervideo_dl/extractor/funk.py49
-rw-r--r--hypervideo_dl/extractor/fusion.py84
-rw-r--r--hypervideo_dl/extractor/gaia.py130
-rw-r--r--hypervideo_dl/extractor/gameinformer.py49
-rw-r--r--hypervideo_dl/extractor/gamespot.py79
-rw-r--r--hypervideo_dl/extractor/gamestar.py65
-rw-r--r--hypervideo_dl/extractor/gaskrank.py101
-rw-r--r--hypervideo_dl/extractor/gazeta.py48
-rw-r--r--hypervideo_dl/extractor/gdcvault.py220
-rw-r--r--hypervideo_dl/extractor/gedidigital.py161
-rw-r--r--hypervideo_dl/extractor/generic.py3597
-rw-r--r--hypervideo_dl/extractor/gfycat.py125
-rw-r--r--hypervideo_dl/extractor/giantbomb.py90
-rw-r--r--hypervideo_dl/extractor/giga.py102
-rw-r--r--hypervideo_dl/extractor/gigya.py22
-rw-r--r--hypervideo_dl/extractor/glide.py43
-rw-r--r--hypervideo_dl/extractor/globo.py240
-rw-r--r--hypervideo_dl/extractor/go.py315
-rw-r--r--hypervideo_dl/extractor/godtube.py58
-rw-r--r--hypervideo_dl/extractor/golem.py72
-rw-r--r--hypervideo_dl/extractor/googledrive.py278
-rw-r--r--hypervideo_dl/extractor/googlepodcasts.py88
-rw-r--r--hypervideo_dl/extractor/googlesearch.py59
-rw-r--r--hypervideo_dl/extractor/goshgay.py51
-rw-r--r--hypervideo_dl/extractor/gputechconf.py35
-rw-r--r--hypervideo_dl/extractor/groupon.py67
-rw-r--r--hypervideo_dl/extractor/hbo.py175
-rw-r--r--hypervideo_dl/extractor/hearthisat.py135
-rw-r--r--hypervideo_dl/extractor/heise.py172
-rw-r--r--hypervideo_dl/extractor/hellporno.py76
-rw-r--r--hypervideo_dl/extractor/helsinki.py43
-rw-r--r--hypervideo_dl/extractor/hentaistigma.py39
-rw-r--r--hypervideo_dl/extractor/hgtv.py40
-rw-r--r--hypervideo_dl/extractor/hidive.py118
-rw-r--r--hypervideo_dl/extractor/historicfilms.py47
-rw-r--r--hypervideo_dl/extractor/hitbox.py214
-rw-r--r--hypervideo_dl/extractor/hitrecord.py68
-rw-r--r--hypervideo_dl/extractor/hketv.py191
-rw-r--r--hypervideo_dl/extractor/hornbunny.py49
-rw-r--r--hypervideo_dl/extractor/hotnewhiphop.py66
-rw-r--r--hypervideo_dl/extractor/hotstar.py252
-rw-r--r--hypervideo_dl/extractor/howcast.py43
-rw-r--r--hypervideo_dl/extractor/howstuffworks.py90
-rw-r--r--hypervideo_dl/extractor/hrti.py208
-rw-r--r--hypervideo_dl/extractor/huajiao.py56
-rw-r--r--hypervideo_dl/extractor/huffpost.py96
-rw-r--r--hypervideo_dl/extractor/hungama.py117
-rw-r--r--hypervideo_dl/extractor/hypem.py49
-rw-r--r--hypervideo_dl/extractor/ign.py257
-rw-r--r--hypervideo_dl/extractor/iheart.py97
-rw-r--r--hypervideo_dl/extractor/imdb.py147
-rw-r--r--hypervideo_dl/extractor/imggaming.py133
-rw-r--r--hypervideo_dl/extractor/imgur.py154
-rw-r--r--hypervideo_dl/extractor/ina.py86
-rw-r--r--hypervideo_dl/extractor/inc.py59
-rw-r--r--hypervideo_dl/extractor/indavideo.py128
-rw-r--r--hypervideo_dl/extractor/infoq.py137
-rw-r--r--hypervideo_dl/extractor/instagram.py474
-rw-r--r--hypervideo_dl/extractor/internazionale.py85
-rw-r--r--hypervideo_dl/extractor/internetvideoarchive.py64
-rw-r--r--hypervideo_dl/extractor/iprima.py149
-rw-r--r--hypervideo_dl/extractor/iqiyi.py219
-rw-r--r--hypervideo_dl/extractor/ir90tv.py42
-rw-r--r--hypervideo_dl/extractor/itv.py185
-rw-r--r--hypervideo_dl/extractor/ivi.py271
-rw-r--r--hypervideo_dl/extractor/ivideon.py83
-rw-r--r--hypervideo_dl/extractor/iwara.py99
-rw-r--r--hypervideo_dl/extractor/izlesene.py117
-rw-r--r--hypervideo_dl/extractor/jamendo.py195
-rw-r--r--hypervideo_dl/extractor/jeuxvideo.py56
-rw-r--r--hypervideo_dl/extractor/joj.py108
-rw-r--r--hypervideo_dl/extractor/jove.py80
-rw-r--r--hypervideo_dl/extractor/jwplatform.py46
-rw-r--r--hypervideo_dl/extractor/kakao.py143
-rw-r--r--hypervideo_dl/extractor/kaltura.py377
-rw-r--r--hypervideo_dl/extractor/kankan.py48
-rw-r--r--hypervideo_dl/extractor/karaoketv.py64
-rw-r--r--hypervideo_dl/extractor/karrierevideos.py99
-rw-r--r--hypervideo_dl/extractor/keezmovies.py133
-rw-r--r--hypervideo_dl/extractor/ketnet.py72
-rw-r--r--hypervideo_dl/extractor/khanacademy.py107
-rw-r--r--hypervideo_dl/extractor/kickstarter.py71
-rw-r--r--hypervideo_dl/extractor/kinja.py221
-rw-r--r--hypervideo_dl/extractor/kinopoisk.py70
-rw-r--r--hypervideo_dl/extractor/konserthusetplay.py124
-rw-r--r--hypervideo_dl/extractor/krasview.py60
-rw-r--r--hypervideo_dl/extractor/ku6.py32
-rw-r--r--hypervideo_dl/extractor/kusi.py88
-rw-r--r--hypervideo_dl/extractor/kuwo.py352
-rw-r--r--hypervideo_dl/extractor/la7.py67
-rw-r--r--hypervideo_dl/extractor/laola1tv.py265
-rw-r--r--hypervideo_dl/extractor/lbry.py280
-rw-r--r--hypervideo_dl/extractor/lci.py26
-rw-r--r--hypervideo_dl/extractor/lcp.py90
-rw-r--r--hypervideo_dl/extractor/lecture2go.py71
-rw-r--r--hypervideo_dl/extractor/lecturio.py243
-rw-r--r--hypervideo_dl/extractor/leeco.py368
-rw-r--r--hypervideo_dl/extractor/lego.py149
-rw-r--r--hypervideo_dl/extractor/lemonde.py58
-rw-r--r--hypervideo_dl/extractor/lenta.py53
-rw-r--r--hypervideo_dl/extractor/libraryofcongress.py153
-rw-r--r--hypervideo_dl/extractor/libsyn.py93
-rw-r--r--hypervideo_dl/extractor/lifenews.py239
-rw-r--r--hypervideo_dl/extractor/limelight.py358
-rw-r--r--hypervideo_dl/extractor/line.py230
-rw-r--r--hypervideo_dl/extractor/linkedin.py182
-rw-r--r--hypervideo_dl/extractor/linuxacademy.py243
-rw-r--r--hypervideo_dl/extractor/litv.py148
-rw-r--r--hypervideo_dl/extractor/livejournal.py42
-rw-r--r--hypervideo_dl/extractor/liveleak.py191
-rw-r--r--hypervideo_dl/extractor/livestream.py366
-rw-r--r--hypervideo_dl/extractor/lnkgo.py88
-rw-r--r--hypervideo_dl/extractor/localnews8.py47
-rw-r--r--hypervideo_dl/extractor/lovehomeporn.py37
-rw-r--r--hypervideo_dl/extractor/lrt.py75
-rw-r--r--hypervideo_dl/extractor/lynda.py341
-rw-r--r--hypervideo_dl/extractor/m6.py25
-rw-r--r--hypervideo_dl/extractor/mailru.py329
-rw-r--r--hypervideo_dl/extractor/malltv.py88
-rw-r--r--hypervideo_dl/extractor/mangomolo.py58
-rw-r--r--hypervideo_dl/extractor/manyvids.py92
-rw-r--r--hypervideo_dl/extractor/maoritv.py31
-rw-r--r--hypervideo_dl/extractor/markiza.py125
-rw-r--r--hypervideo_dl/extractor/massengeschmacktv.py77
-rw-r--r--hypervideo_dl/extractor/matchtv.py55
-rw-r--r--hypervideo_dl/extractor/mdr.py195
-rw-r--r--hypervideo_dl/extractor/medaltv.py137
-rw-r--r--hypervideo_dl/extractor/medialaan.py114
-rw-r--r--hypervideo_dl/extractor/mediaset.py182
-rw-r--r--hypervideo_dl/extractor/mediasite.py366
-rw-r--r--hypervideo_dl/extractor/medici.py70
-rw-r--r--hypervideo_dl/extractor/megaphone.py55
-rw-r--r--hypervideo_dl/extractor/meipai.py104
-rw-r--r--hypervideo_dl/extractor/melonvod.py72
-rw-r--r--hypervideo_dl/extractor/meta.py73
-rw-r--r--hypervideo_dl/extractor/metacafe.py287
-rw-r--r--hypervideo_dl/extractor/metacritic.py65
-rw-r--r--hypervideo_dl/extractor/mgoon.py87
-rw-r--r--hypervideo_dl/extractor/mgtv.py100
-rw-r--r--hypervideo_dl/extractor/miaopai.py40
-rw-r--r--hypervideo_dl/extractor/microsoftvirtualacademy.py195
-rw-r--r--hypervideo_dl/extractor/minds.py196
-rw-r--r--hypervideo_dl/extractor/ministrygrid.py57
-rw-r--r--hypervideo_dl/extractor/minoto.py51
-rw-r--r--hypervideo_dl/extractor/miomio.py141
-rw-r--r--hypervideo_dl/extractor/mit.py132
-rw-r--r--hypervideo_dl/extractor/mitele.py85
-rw-r--r--hypervideo_dl/extractor/mixcloud.py356
-rw-r--r--hypervideo_dl/extractor/mlb.py267
-rw-r--r--hypervideo_dl/extractor/mnet.py89
-rw-r--r--hypervideo_dl/extractor/moevideo.py79
-rw-r--r--hypervideo_dl/extractor/mofosex.py79
-rw-r--r--hypervideo_dl/extractor/mojvideo.py58
-rw-r--r--hypervideo_dl/extractor/morningstar.py50
-rw-r--r--hypervideo_dl/extractor/motherless.py232
-rw-r--r--hypervideo_dl/extractor/motorsport.py49
-rw-r--r--hypervideo_dl/extractor/movieclips.py49
-rw-r--r--hypervideo_dl/extractor/moviezine.py45
-rw-r--r--hypervideo_dl/extractor/movingimage.py52
-rw-r--r--hypervideo_dl/extractor/msn.py171
-rw-r--r--hypervideo_dl/extractor/mtv.py488
-rw-r--r--hypervideo_dl/extractor/muenchentv.py75
-rw-r--r--hypervideo_dl/extractor/mwave.py90
-rw-r--r--hypervideo_dl/extractor/mychannels.py40
-rw-r--r--hypervideo_dl/extractor/myspace.py212
-rw-r--r--hypervideo_dl/extractor/myspass.py56
-rw-r--r--hypervideo_dl/extractor/myvi.py111
-rw-r--r--hypervideo_dl/extractor/myvidster.py29
-rw-r--r--hypervideo_dl/extractor/nationalgeographic.py82
-rw-r--r--hypervideo_dl/extractor/naver.py166
-rw-r--r--hypervideo_dl/extractor/nba.py428
-rw-r--r--hypervideo_dl/extractor/nbc.py525
-rw-r--r--hypervideo_dl/extractor/ndr.py440
-rw-r--r--hypervideo_dl/extractor/ndtv.py115
-rw-r--r--hypervideo_dl/extractor/nerdcubed.py36
-rw-r--r--hypervideo_dl/extractor/neteasemusic.py485
-rw-r--r--hypervideo_dl/extractor/netzkino.py89
-rw-r--r--hypervideo_dl/extractor/newgrounds.py168
-rw-r--r--hypervideo_dl/extractor/newstube.py83
-rw-r--r--hypervideo_dl/extractor/nextmedia.py238
-rw-r--r--hypervideo_dl/extractor/nexx.py453
-rw-r--r--hypervideo_dl/extractor/nfl.py160
-rw-r--r--hypervideo_dl/extractor/nhk.py178
-rw-r--r--hypervideo_dl/extractor/nhl.py128
-rw-r--r--hypervideo_dl/extractor/nick.py249
-rw-r--r--hypervideo_dl/extractor/niconico.py515
-rw-r--r--hypervideo_dl/extractor/ninecninemedia.py102
-rw-r--r--hypervideo_dl/extractor/ninegag.py130
-rw-r--r--hypervideo_dl/extractor/ninenow.py93
-rw-r--r--hypervideo_dl/extractor/nintendo.py60
-rw-r--r--hypervideo_dl/extractor/njpwworld.py100
-rw-r--r--hypervideo_dl/extractor/nobelprize.py62
-rw-r--r--hypervideo_dl/extractor/nonktube.py38
-rw-r--r--hypervideo_dl/extractor/noovo.py104
-rw-r--r--hypervideo_dl/extractor/normalboots.py54
-rw-r--r--hypervideo_dl/extractor/nosvideo.py75
-rw-r--r--hypervideo_dl/extractor/nova.py305
-rw-r--r--hypervideo_dl/extractor/nowness.py147
-rw-r--r--hypervideo_dl/extractor/noz.py89
-rw-r--r--hypervideo_dl/extractor/npo.py767
-rw-r--r--hypervideo_dl/extractor/npr.py124
-rw-r--r--hypervideo_dl/extractor/nrk.py873
-rw-r--r--hypervideo_dl/extractor/nrl.py30
-rw-r--r--hypervideo_dl/extractor/ntvcojp.py49
-rw-r--r--hypervideo_dl/extractor/ntvde.py77
-rw-r--r--hypervideo_dl/extractor/ntvru.py131
-rw-r--r--hypervideo_dl/extractor/nuevo.py39
-rw-r--r--hypervideo_dl/extractor/nuvid.py71
-rw-r--r--hypervideo_dl/extractor/nytimes.py261
-rw-r--r--hypervideo_dl/extractor/nzz.py43
-rw-r--r--hypervideo_dl/extractor/odatv.py50
-rw-r--r--hypervideo_dl/extractor/odnoklassniki.py268
-rw-r--r--hypervideo_dl/extractor/oktoberfesttv.py47
-rw-r--r--hypervideo_dl/extractor/once.py43
-rw-r--r--hypervideo_dl/extractor/ondemandkorea.py62
-rw-r--r--hypervideo_dl/extractor/onet.py268
-rw-r--r--hypervideo_dl/extractor/onionstudios.py53
-rw-r--r--hypervideo_dl/extractor/ooyala.py210
-rw-r--r--hypervideo_dl/extractor/openload.py238
-rw-r--r--hypervideo_dl/extractor/ora.py75
-rw-r--r--hypervideo_dl/extractor/orf.py589
-rw-r--r--hypervideo_dl/extractor/outsidetv.py28
-rw-r--r--hypervideo_dl/extractor/packtpub.py164
-rw-r--r--hypervideo_dl/extractor/palcomp3.py148
-rw-r--r--hypervideo_dl/extractor/pandoratv.py134
-rw-r--r--hypervideo_dl/extractor/parliamentliveuk.py43
-rw-r--r--hypervideo_dl/extractor/patreon.py156
-rw-r--r--hypervideo_dl/extractor/pbs.py710
-rw-r--r--hypervideo_dl/extractor/pearvideo.py63
-rw-r--r--hypervideo_dl/extractor/peertube.py628
-rw-r--r--hypervideo_dl/extractor/people.py32
-rw-r--r--hypervideo_dl/extractor/performgroup.py83
-rw-r--r--hypervideo_dl/extractor/periscope.py189
-rw-r--r--hypervideo_dl/extractor/philharmoniedeparis.py106
-rw-r--r--hypervideo_dl/extractor/phoenix.py133
-rw-r--r--hypervideo_dl/extractor/photobucket.py46
-rw-r--r--hypervideo_dl/extractor/picarto.py127
-rw-r--r--hypervideo_dl/extractor/piksel.py187
-rw-r--r--hypervideo_dl/extractor/pinkbike.py97
-rw-r--r--hypervideo_dl/extractor/pinterest.py203
-rw-r--r--hypervideo_dl/extractor/pladform.py125
-rw-r--r--hypervideo_dl/extractor/platzi.py224
-rw-r--r--hypervideo_dl/extractor/playfm.py75
-rw-r--r--hypervideo_dl/extractor/playplustv.py109
-rw-r--r--hypervideo_dl/extractor/plays.py53
-rw-r--r--hypervideo_dl/extractor/playstuff.py65
-rw-r--r--hypervideo_dl/extractor/playtvak.py191
-rw-r--r--hypervideo_dl/extractor/playvid.py99
-rw-r--r--hypervideo_dl/extractor/playwire.py75
-rw-r--r--hypervideo_dl/extractor/pluralsight.py501
-rw-r--r--hypervideo_dl/extractor/podomatic.py76
-rw-r--r--hypervideo_dl/extractor/pokemon.py71
-rw-r--r--hypervideo_dl/extractor/polskieradio.py180
-rw-r--r--hypervideo_dl/extractor/popcorntimes.py99
-rw-r--r--hypervideo_dl/extractor/popcorntv.py76
-rw-r--r--hypervideo_dl/extractor/porn91.py63
-rw-r--r--hypervideo_dl/extractor/porncom.py103
-rw-r--r--hypervideo_dl/extractor/pornhd.py121
-rw-r--r--hypervideo_dl/extractor/pornhub.py745
-rw-r--r--hypervideo_dl/extractor/pornotube.py85
-rw-r--r--hypervideo_dl/extractor/pornovoisines.py108
-rw-r--r--hypervideo_dl/extractor/pornoxo.py58
-rw-r--r--hypervideo_dl/extractor/presstv.py74
-rw-r--r--hypervideo_dl/extractor/prosiebensat1.py500
-rw-r--r--hypervideo_dl/extractor/puhutv.py239
-rw-r--r--hypervideo_dl/extractor/puls4.py57
-rw-r--r--hypervideo_dl/extractor/pyvideo.py72
-rw-r--r--hypervideo_dl/extractor/qqmusic.py369
-rw-r--r--hypervideo_dl/extractor/r7.py112
-rw-r--r--hypervideo_dl/extractor/radiobremen.py63
-rw-r--r--hypervideo_dl/extractor/radiocanada.py171
-rw-r--r--hypervideo_dl/extractor/radiode.py52
-rw-r--r--hypervideo_dl/extractor/radiofrance.py59
-rw-r--r--hypervideo_dl/extractor/radiojavan.py83
-rw-r--r--hypervideo_dl/extractor/rai.py487
-rw-r--r--hypervideo_dl/extractor/raywenderlich.py179
-rw-r--r--hypervideo_dl/extractor/rbmaradio.py72
-rw-r--r--hypervideo_dl/extractor/rds.py70
-rw-r--r--hypervideo_dl/extractor/redbulltv.py231
-rw-r--r--hypervideo_dl/extractor/reddit.py161
-rw-r--r--hypervideo_dl/extractor/redtube.py136
-rw-r--r--hypervideo_dl/extractor/regiotv.py62
-rw-r--r--hypervideo_dl/extractor/rentv.py106
-rw-r--r--hypervideo_dl/extractor/restudy.py44
-rw-r--r--hypervideo_dl/extractor/reuters.py69
-rw-r--r--hypervideo_dl/extractor/reverbnation.py53
-rw-r--r--hypervideo_dl/extractor/rice.py116
-rw-r--r--hypervideo_dl/extractor/rmcdecouverte.py55
-rw-r--r--hypervideo_dl/extractor/ro220.py43
-rw-r--r--hypervideo_dl/extractor/rockstargames.py69
-rw-r--r--hypervideo_dl/extractor/roosterteeth.py137
-rw-r--r--hypervideo_dl/extractor/rottentomatoes.py32
-rw-r--r--hypervideo_dl/extractor/roxwel.py53
-rw-r--r--hypervideo_dl/extractor/rozhlas.py50
-rw-r--r--hypervideo_dl/extractor/rtbf.py161
-rw-r--r--hypervideo_dl/extractor/rte.py167
-rw-r--r--hypervideo_dl/extractor/rtl2.py207
-rw-r--r--hypervideo_dl/extractor/rtlnl.py146
-rw-r--r--hypervideo_dl/extractor/rtp.py66
-rw-r--r--hypervideo_dl/extractor/rts.py235
-rw-r--r--hypervideo_dl/extractor/rtve.py268
-rw-r--r--hypervideo_dl/extractor/rtvnh.py62
-rw-r--r--hypervideo_dl/extractor/rtvs.py47
-rw-r--r--hypervideo_dl/extractor/ruhd.py45
-rw-r--r--hypervideo_dl/extractor/rumble.py67
-rw-r--r--hypervideo_dl/extractor/rutube.py313
-rw-r--r--hypervideo_dl/extractor/rutv.py211
-rw-r--r--hypervideo_dl/extractor/ruutu.py227
-rw-r--r--hypervideo_dl/extractor/ruv.py101
-rw-r--r--hypervideo_dl/extractor/safari.py264
-rw-r--r--hypervideo_dl/extractor/samplefocus.py100
-rw-r--r--hypervideo_dl/extractor/sapo.py119
-rw-r--r--hypervideo_dl/extractor/savefrom.py34
-rw-r--r--hypervideo_dl/extractor/sbs.py78
-rw-r--r--hypervideo_dl/extractor/screencast.py123
-rw-r--r--hypervideo_dl/extractor/screencastomatic.py51
-rw-r--r--hypervideo_dl/extractor/scrippsnetworks.py152
-rw-r--r--hypervideo_dl/extractor/scte.py144
-rw-r--r--hypervideo_dl/extractor/seeker.py58
-rw-r--r--hypervideo_dl/extractor/senateisvp.py153
-rw-r--r--hypervideo_dl/extractor/sendtonews.py105
-rw-r--r--hypervideo_dl/extractor/servus.py148
-rw-r--r--hypervideo_dl/extractor/sevenplus.py94
-rw-r--r--hypervideo_dl/extractor/sexu.py63
-rw-r--r--hypervideo_dl/extractor/seznamzpravy.py169
-rw-r--r--hypervideo_dl/extractor/shahid.py225
-rw-r--r--hypervideo_dl/extractor/shared.py141
-rw-r--r--hypervideo_dl/extractor/showroomlive.py84
-rw-r--r--hypervideo_dl/extractor/simplecast.py160
-rw-r--r--hypervideo_dl/extractor/sina.py115
-rw-r--r--hypervideo_dl/extractor/sixplay.py129
-rw-r--r--hypervideo_dl/extractor/sky.py131
-rw-r--r--hypervideo_dl/extractor/skyit.py239
-rw-r--r--hypervideo_dl/extractor/skylinewebcams.py42
-rw-r--r--hypervideo_dl/extractor/skynewsarabia.py117
-rw-r--r--hypervideo_dl/extractor/slideshare.py56
-rw-r--r--hypervideo_dl/extractor/slideslive.py109
-rw-r--r--hypervideo_dl/extractor/slutload.py65
-rw-r--r--hypervideo_dl/extractor/snotr.py73
-rw-r--r--hypervideo_dl/extractor/sohu.py202
-rw-r--r--hypervideo_dl/extractor/sonyliv.py112
-rw-r--r--hypervideo_dl/extractor/soundcloud.py815
-rw-r--r--hypervideo_dl/extractor/soundgasm.py77
-rw-r--r--hypervideo_dl/extractor/southpark.py127
-rw-r--r--hypervideo_dl/extractor/spankbang.py198
-rw-r--r--hypervideo_dl/extractor/spankwire.py182
-rw-r--r--hypervideo_dl/extractor/spiegel.py54
-rw-r--r--hypervideo_dl/extractor/spike.py48
-rw-r--r--hypervideo_dl/extractor/sport5.py92
-rw-r--r--hypervideo_dl/extractor/sportbox.py99
-rw-r--r--hypervideo_dl/extractor/sportdeutschland.py105
-rw-r--r--hypervideo_dl/extractor/spotify.py156
-rw-r--r--hypervideo_dl/extractor/spreaker.py176
-rw-r--r--hypervideo_dl/extractor/springboardplatform.py125
-rw-r--r--hypervideo_dl/extractor/sprout.py64
-rw-r--r--hypervideo_dl/extractor/srgssr.py252
-rw-r--r--hypervideo_dl/extractor/srmediathek.py59
-rw-r--r--hypervideo_dl/extractor/stanfordoc.py91
-rw-r--r--hypervideo_dl/extractor/steam.py149
-rw-r--r--hypervideo_dl/extractor/stitcher.py144
-rw-r--r--hypervideo_dl/extractor/storyfire.py151
-rw-r--r--hypervideo_dl/extractor/streamable.py112
-rw-r--r--hypervideo_dl/extractor/streamcloud.py78
-rw-r--r--hypervideo_dl/extractor/streamcz.py105
-rw-r--r--hypervideo_dl/extractor/streetvoice.py100
-rw-r--r--hypervideo_dl/extractor/stretchinternet.py37
-rw-r--r--hypervideo_dl/extractor/stv.py95
-rw-r--r--hypervideo_dl/extractor/sunporno.py79
-rw-r--r--hypervideo_dl/extractor/sverigesradio.py115
-rw-r--r--hypervideo_dl/extractor/svt.py425
-rw-r--r--hypervideo_dl/extractor/swrmediathek.py115
-rw-r--r--hypervideo_dl/extractor/syfy.py58
-rw-r--r--hypervideo_dl/extractor/sztvhu.py41
-rw-r--r--hypervideo_dl/extractor/tagesschau.py311
-rw-r--r--hypervideo_dl/extractor/tass.py62
-rw-r--r--hypervideo_dl/extractor/tbs.py89
-rw-r--r--hypervideo_dl/extractor/tdslifeway.py33
-rw-r--r--hypervideo_dl/extractor/teachable.py298
-rw-r--r--hypervideo_dl/extractor/teachertube.py129
-rw-r--r--hypervideo_dl/extractor/teachingchannel.py33
-rw-r--r--hypervideo_dl/extractor/teamcoco.py205
-rw-r--r--hypervideo_dl/extractor/teamtreehouse.py140
-rw-r--r--hypervideo_dl/extractor/techtalks.py82
-rw-r--r--hypervideo_dl/extractor/ted.py367
-rw-r--r--hypervideo_dl/extractor/tele13.py88
-rw-r--r--hypervideo_dl/extractor/tele5.py108
-rw-r--r--hypervideo_dl/extractor/telebruxelles.py76
-rw-r--r--hypervideo_dl/extractor/telecinco.py151
-rw-r--r--hypervideo_dl/extractor/telegraaf.py89
-rw-r--r--hypervideo_dl/extractor/telemb.py78
-rw-r--r--hypervideo_dl/extractor/telequebec.py252
-rw-r--r--hypervideo_dl/extractor/teletask.py53
-rw-r--r--hypervideo_dl/extractor/telewebion.py55
-rw-r--r--hypervideo_dl/extractor/tennistv.py112
-rw-r--r--hypervideo_dl/extractor/tenplay.py70
-rw-r--r--hypervideo_dl/extractor/testurl.py64
-rw-r--r--hypervideo_dl/extractor/tf1.py87
-rw-r--r--hypervideo_dl/extractor/tfo.py55
-rw-r--r--hypervideo_dl/extractor/theintercept.py49
-rw-r--r--hypervideo_dl/extractor/theplatform.py414
-rw-r--r--hypervideo_dl/extractor/thescene.py44
-rw-r--r--hypervideo_dl/extractor/thestar.py36
-rw-r--r--hypervideo_dl/extractor/thesun.py38
-rw-r--r--hypervideo_dl/extractor/theweatherchannel.py102
-rw-r--r--hypervideo_dl/extractor/thisamericanlife.py40
-rw-r--r--hypervideo_dl/extractor/thisav.py73
-rw-r--r--hypervideo_dl/extractor/thisoldhouse.py47
-rw-r--r--hypervideo_dl/extractor/threeqsdn.py164
-rw-r--r--hypervideo_dl/extractor/tiktok.py147
-rw-r--r--hypervideo_dl/extractor/tinypic.py56
-rw-r--r--hypervideo_dl/extractor/tmz.py111
-rw-r--r--hypervideo_dl/extractor/tnaflix.py327
-rw-r--r--hypervideo_dl/extractor/toggle.py234
-rw-r--r--hypervideo_dl/extractor/tonline.py59
-rw-r--r--hypervideo_dl/extractor/toongoggles.py81
-rw-r--r--hypervideo_dl/extractor/toutv.py93
-rw-r--r--hypervideo_dl/extractor/toypics.py90
-rw-r--r--hypervideo_dl/extractor/traileraddict.py64
-rw-r--r--hypervideo_dl/extractor/trilulilu.py103
-rw-r--r--hypervideo_dl/extractor/trovo.py194
-rw-r--r--hypervideo_dl/extractor/trunews.py34
-rw-r--r--hypervideo_dl/extractor/trutv.py75
-rw-r--r--hypervideo_dl/extractor/tube8.py86
-rw-r--r--hypervideo_dl/extractor/tubitv.py110
-rw-r--r--hypervideo_dl/extractor/tudou.py49
-rw-r--r--hypervideo_dl/extractor/tumblr.py213
-rw-r--r--hypervideo_dl/extractor/tunein.py183
-rw-r--r--hypervideo_dl/extractor/tunepk.py90
-rw-r--r--hypervideo_dl/extractor/turbo.py68
-rw-r--r--hypervideo_dl/extractor/turner.py260
-rw-r--r--hypervideo_dl/extractor/tv2.py248
-rw-r--r--hypervideo_dl/extractor/tv2dk.py165
-rw-r--r--hypervideo_dl/extractor/tv2hu.py62
-rw-r--r--hypervideo_dl/extractor/tv4.py128
-rw-r--r--hypervideo_dl/extractor/tv5mondeplus.py117
-rw-r--r--hypervideo_dl/extractor/tv5unis.py121
-rw-r--r--hypervideo_dl/extractor/tva.py88
-rw-r--r--hypervideo_dl/extractor/tvanouvelles.py65
-rw-r--r--hypervideo_dl/extractor/tvc.py109
-rw-r--r--hypervideo_dl/extractor/tver.py61
-rw-r--r--hypervideo_dl/extractor/tvigle.py138
-rw-r--r--hypervideo_dl/extractor/tvland.py37
-rw-r--r--hypervideo_dl/extractor/tvn24.py103
-rw-r--r--hypervideo_dl/extractor/tvnet.py147
-rw-r--r--hypervideo_dl/extractor/tvnoe.py48
-rw-r--r--hypervideo_dl/extractor/tvnow.py486
-rw-r--r--hypervideo_dl/extractor/tvp.py252
-rw-r--r--hypervideo_dl/extractor/tvplay.py492
-rw-r--r--hypervideo_dl/extractor/tvplayer.py86
-rw-r--r--hypervideo_dl/extractor/tweakers.py62
-rw-r--r--hypervideo_dl/extractor/twentyfourvideo.py133
-rw-r--r--hypervideo_dl/extractor/twentymin.py91
-rw-r--r--hypervideo_dl/extractor/twentythreevideo.py80
-rw-r--r--hypervideo_dl/extractor/twitcasting.py111
-rw-r--r--hypervideo_dl/extractor/twitch.py988
-rw-r--r--hypervideo_dl/extractor/twitter.py669
-rw-r--r--hypervideo_dl/extractor/udemy.py481
-rw-r--r--hypervideo_dl/extractor/udn.py102
-rw-r--r--hypervideo_dl/extractor/ufctv.py16
-rw-r--r--hypervideo_dl/extractor/uktvplay.py36
-rw-r--r--hypervideo_dl/extractor/umg.py103
-rw-r--r--hypervideo_dl/extractor/unistra.py67
-rw-r--r--hypervideo_dl/extractor/unity.py32
-rw-r--r--hypervideo_dl/extractor/uol.py144
-rw-r--r--hypervideo_dl/extractor/uplynk.py70
-rw-r--r--hypervideo_dl/extractor/urort.py66
-rw-r--r--hypervideo_dl/extractor/urplay.py107
-rw-r--r--hypervideo_dl/extractor/usanetwork.py24
-rw-r--r--hypervideo_dl/extractor/usatoday.py63
-rw-r--r--hypervideo_dl/extractor/ustream.py284
-rw-r--r--hypervideo_dl/extractor/ustudio.py125
-rw-r--r--hypervideo_dl/extractor/varzesh3.py79
-rw-r--r--hypervideo_dl/extractor/vbox7.py105
-rw-r--r--hypervideo_dl/extractor/veehd.py118
-rw-r--r--hypervideo_dl/extractor/veoh.py103
-rw-r--r--hypervideo_dl/extractor/vesti.py121
-rw-r--r--hypervideo_dl/extractor/vevo.py374
-rw-r--r--hypervideo_dl/extractor/vgtv.py313
-rw-r--r--hypervideo_dl/extractor/vh1.py41
-rw-r--r--hypervideo_dl/extractor/vice.py337
-rw-r--r--hypervideo_dl/extractor/vidbit.py84
-rw-r--r--hypervideo_dl/extractor/viddler.py138
-rw-r--r--hypervideo_dl/extractor/videa.py173
-rw-r--r--hypervideo_dl/extractor/videodetective.py29
-rw-r--r--hypervideo_dl/extractor/videofyme.py52
-rw-r--r--hypervideo_dl/extractor/videomore.py322
-rw-r--r--hypervideo_dl/extractor/videopress.py100
-rw-r--r--hypervideo_dl/extractor/vidio.py89
-rw-r--r--hypervideo_dl/extractor/vidlii.py125
-rw-r--r--hypervideo_dl/extractor/vidme.py295
-rw-r--r--hypervideo_dl/extractor/vier.py264
-rw-r--r--hypervideo_dl/extractor/viewlift.py250
-rw-r--r--hypervideo_dl/extractor/viidea.py202
-rw-r--r--hypervideo_dl/extractor/viki.py433
-rw-r--r--hypervideo_dl/extractor/vimeo.py1158
-rw-r--r--hypervideo_dl/extractor/vimple.py61
-rw-r--r--hypervideo_dl/extractor/vine.py154
-rw-r--r--hypervideo_dl/extractor/viqeo.py99
-rw-r--r--hypervideo_dl/extractor/viu.py272
-rw-r--r--hypervideo_dl/extractor/vk.py689
-rw-r--r--hypervideo_dl/extractor/vlive.py328
-rw-r--r--hypervideo_dl/extractor/vodlocker.py80
-rw-r--r--hypervideo_dl/extractor/vodpl.py32
-rw-r--r--hypervideo_dl/extractor/vodplatform.py40
-rw-r--r--hypervideo_dl/extractor/voicerepublic.py62
-rw-r--r--hypervideo_dl/extractor/voot.py100
-rw-r--r--hypervideo_dl/extractor/voxmedia.py225
-rw-r--r--hypervideo_dl/extractor/vrak.py80
-rw-r--r--hypervideo_dl/extractor/vrt.py87
-rw-r--r--hypervideo_dl/extractor/vrv.py277
-rw-r--r--hypervideo_dl/extractor/vshare.py74
-rw-r--r--hypervideo_dl/extractor/vtm.py62
-rw-r--r--hypervideo_dl/extractor/vube.py172
-rw-r--r--hypervideo_dl/extractor/vuclip.py70
-rw-r--r--hypervideo_dl/extractor/vvvvid.py284
-rw-r--r--hypervideo_dl/extractor/vyborymos.py55
-rw-r--r--hypervideo_dl/extractor/vzaar.py112
-rw-r--r--hypervideo_dl/extractor/wakanim.py66
-rw-r--r--hypervideo_dl/extractor/walla.py86
-rw-r--r--hypervideo_dl/extractor/washingtonpost.py116
-rw-r--r--hypervideo_dl/extractor/wat.py106
-rw-r--r--hypervideo_dl/extractor/watchbox.py161
-rw-r--r--hypervideo_dl/extractor/watchindianporn.py68
-rw-r--r--hypervideo_dl/extractor/wdr.py347
-rw-r--r--hypervideo_dl/extractor/webcaster.py102
-rw-r--r--hypervideo_dl/extractor/webofstories.py160
-rw-r--r--hypervideo_dl/extractor/weibo.py140
-rw-r--r--hypervideo_dl/extractor/weiqitv.py52
-rw-r--r--hypervideo_dl/extractor/wistia.py199
-rw-r--r--hypervideo_dl/extractor/worldstarhiphop.py40
-rw-r--r--hypervideo_dl/extractor/wsj.py123
-rw-r--r--hypervideo_dl/extractor/wwe.py140
-rw-r--r--hypervideo_dl/extractor/xbef.py44
-rw-r--r--hypervideo_dl/extractor/xboxclips.py68
-rw-r--r--hypervideo_dl/extractor/xfileshare.py201
-rw-r--r--hypervideo_dl/extractor/xhamster.py450
-rw-r--r--hypervideo_dl/extractor/xiami.py201
-rw-r--r--hypervideo_dl/extractor/ximalaya.py233
-rw-r--r--hypervideo_dl/extractor/xminus.py79
-rw-r--r--hypervideo_dl/extractor/xnxx.py84
-rw-r--r--hypervideo_dl/extractor/xstream.py119
-rw-r--r--hypervideo_dl/extractor/xtube.py233
-rw-r--r--hypervideo_dl/extractor/xuite.py153
-rw-r--r--hypervideo_dl/extractor/xvideos.py147
-rw-r--r--hypervideo_dl/extractor/xxxymovies.py81
-rw-r--r--hypervideo_dl/extractor/yahoo.py569
-rw-r--r--hypervideo_dl/extractor/yandexdisk.py147
-rw-r--r--hypervideo_dl/extractor/yandexmusic.py459
-rw-r--r--hypervideo_dl/extractor/yandexvideo.py144
-rw-r--r--hypervideo_dl/extractor/yapfiles.py101
-rw-r--r--hypervideo_dl/extractor/yesjapan.py62
-rw-r--r--hypervideo_dl/extractor/yinyuetai.py56
-rw-r--r--hypervideo_dl/extractor/ynet.py52
-rw-r--r--hypervideo_dl/extractor/youjizz.py95
-rw-r--r--hypervideo_dl/extractor/youku.py309
-rw-r--r--hypervideo_dl/extractor/younow.py202
-rw-r--r--hypervideo_dl/extractor/youporn.py184
-rw-r--r--hypervideo_dl/extractor/yourporn.py67
-rw-r--r--hypervideo_dl/extractor/yourupload.py46
-rw-r--r--hypervideo_dl/extractor/youtube.py3239
-rw-r--r--hypervideo_dl/extractor/zapiks.py109
-rw-r--r--hypervideo_dl/extractor/zattoo.py433
-rw-r--r--hypervideo_dl/extractor/zdf.py378
-rw-r--r--hypervideo_dl/extractor/zhihu.py69
-rw-r--r--hypervideo_dl/extractor/zingmp3.py161
-rw-r--r--hypervideo_dl/extractor/zoom.py68
-rw-r--r--hypervideo_dl/extractor/zype.py145
-rw-r--r--hypervideo_dl/jsinterp.py262
-rw-r--r--hypervideo_dl/options.py916
-rw-r--r--hypervideo_dl/postprocessor/__init__.py40
-rw-r--r--hypervideo_dl/postprocessor/common.py69
-rw-r--r--hypervideo_dl/postprocessor/embedthumbnail.py130
-rw-r--r--hypervideo_dl/postprocessor/execafterdownload.py31
-rw-r--r--hypervideo_dl/postprocessor/ffmpeg.py657
-rw-r--r--hypervideo_dl/postprocessor/metadatafromtitle.py48
-rw-r--r--hypervideo_dl/postprocessor/xattrpp.py79
-rw-r--r--hypervideo_dl/socks.py273
-rw-r--r--hypervideo_dl/utils.py5774
-rw-r--r--hypervideo_dl/version.py3
-rw-r--r--setup.cfg6
-rw-r--r--setup.py148
-rw-r--r--test/__init__.py0
-rw-r--r--test/helper.py282
-rw-r--r--test/parameters.json43
-rw-r--r--test/test_InfoExtractor.py1132
-rw-r--r--test/test_YoutubeDL.py1002
-rw-r--r--test/test_YoutubeDLCookieJar.py51
-rw-r--r--test/test_aes.py63
-rw-r--r--test/test_age_restriction.py50
-rw-r--r--test/test_all_urls.py126
-rw-r--r--test/test_cache.py59
-rw-r--r--test/test_compat.py126
-rw-r--r--test/test_download.py265
-rw-r--r--test/test_downloader_http.py115
-rw-r--r--test/test_execution.py54
-rw-r--r--test/test_http.py166
-rw-r--r--test/test_netrc.py26
-rw-r--r--test/test_options.py26
-rw-r--r--test/test_postprocessors.py17
-rw-r--r--test/test_socks.py118
-rw-r--r--test/test_subtitles.py353
-rw-r--r--test/test_unicode_literals.py63
-rw-r--r--test/test_utils.py1480
-rw-r--r--test/test_verbose_output.py71
-rw-r--r--test/test_write_annotations.py80
-rw-r--r--test/test_youtube_lists.py80
-rw-r--r--test/test_youtube_misc.py26
-rw-r--r--test/testcert.pem52
-rw-r--r--test/testdata/cookies/httponly_cookies.txt6
-rw-r--r--test/testdata/cookies/malformed_cookies.txt9
-rw-r--r--test/testdata/cookies/session_cookies.txt6
-rw-r--r--test/testdata/f4m/custom_base_url.f4m10
-rw-r--r--test/testdata/m3u8/pluzz_francetv_11507.m3u814
-rw-r--r--test/testdata/m3u8/teamcoco_11995.m3u816
-rw-r--r--test/testdata/m3u8/ted_18923.m3u828
-rw-r--r--test/testdata/m3u8/toggle_mobile_12211.m3u813
-rw-r--r--test/testdata/m3u8/twitch_vod.m3u820
-rw-r--r--test/testdata/m3u8/vidio.m3u810
-rw-r--r--test/testdata/mpd/float_duration.mpd18
-rw-r--r--test/testdata/mpd/unfragmented.mpd28
-rw-r--r--test/testdata/mpd/urls_only.mpd218
-rw-r--r--test/testdata/xspf/foo_xspf.xspf34
-rw-r--r--tox.ini13
901 files changed, 161753 insertions, 0 deletions
diff --git a/.build.yml b/.build.yml
new file mode 100644
index 0000000..223a842
--- /dev/null
+++ b/.build.yml
@@ -0,0 +1,31 @@
+image: debian/sid
+packages:
+ - jython
+ - python3-pip
+ - virtualenv
+tasks:
+ - core: |
+ cd hypervideo
+ virtualenv -p python3 venv
+ source venv/bin/activate
+ python --version
+ pip install nose
+ export YTDL_TEST_SET=core
+ export JYTHON=true;
+ bash ./devscripts/run_tests.sh || true
+ - download: |
+ cd hypervideo
+ virtualenv -p python3 venv
+ source venv/bin/activate
+ python --version
+ pip install nose
+ export YTDL_TEST_SET=download
+ export JYTHON=true;
+ bash ./devscripts/run_tests.sh || true
+ - flake8: |
+ cd hypervideo
+ virtualenv -p python3 venv
+ source venv/bin/activate
+ python --version
+ pip install flake8
+ flake8 .
diff --git a/.drone.yml b/.drone.yml
new file mode 100644
index 0000000..3c719be
--- /dev/null
+++ b/.drone.yml
@@ -0,0 +1,20 @@
+kind: pipeline
+name: default
+
+steps:
+- name: test
+ image: debian:bullseye
+ commands:
+ - apt update -y
+ - apt install -y python3-pip jython
+ - pip3 install nose flake8
+ # Core
+ - export YTDL_TEST_SET=core
+ - export JYTHON=true;
+ - bash ./devscripts/run_tests.sh || true
+ # Download
+ - export YTDL_TEST_SET=download
+ - export JYTHON=true;
+ - bash ./devscripts/run_tests.sh || true
+ # Check syntax
+ - flake8 .
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..2b9db86
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,setup.py,build,.git,venv
+ignore = W503,W504,E402,E501,E731,E741
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..30337c9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,101 @@
+*.pyc
+*.pyo
+*.class
+*~
+*.DS_Store
+wine-py2exe/
+py2exe.log
+*.kate-swp
+build/
+dist/
+MANIFEST
+README.txt
+hypervideo.1
+hypervideo.bash-completion
+hypervideo.fish
+youtube_dl/extractor/lazy_extractors.py
+hypervideo
+hypervideo.exe
+hypervideo.tar.gz
+.coverage
+cover/
+updates_key.pem
+*.egg-info
+*.srt
+*.ttml
+*.sbv
+*.vtt
+*.flv
+*.mp4
+*.m4a
+*.m4v
+*.mp3
+*.3gp
+*.wav
+*.webm
+*.ape
+*.mkv
+*.swf
+*.part
+*.ytdl
+*.swp
+test/local_parameters.json
+.tox
+hypervideo.zsh
+
+# IntelliJ related files
+.idea
+*.iml
+
+tmp/
+venv/
+
+# VS Code related files
+.vscode
+
+# Docs
+docs/_build/
+
+# Ignore nonfree JS or SWF bytecode
+.github/ISSUE_TEMPLATE.md
+.github/ISSUE_TEMPLATE_tmpl.md
+.github/PULL_REQUEST_TEMPLATE.md
+devscripts/create-github-release.py
+devscripts/gh-pages/add-version.py
+devscripts/gh-pages/generate-download.py
+devscripts/gh-pages/sign-versions.py
+devscripts/gh-pages/update-copyright.py
+devscripts/gh-pages/update-feed.py
+devscripts/gh-pages/update-sites.py
+devscripts/make_issue_template.py
+devscripts/show-downloads-statistics.py
+devscripts/wine-py2exe.sh
+test/swftests/.gitignore
+test/swftests/ArrayAccess.as
+test/swftests/ClassCall.as
+test/swftests/ClassConstruction.as
+test/swftests/ConstArrayAccess.as
+test/swftests/ConstantInt.as
+test/swftests/DictCall.as
+test/swftests/EqualsOperator.as
+test/swftests/LocalVars.as
+test/swftests/MemberAssignment.as
+test/swftests/NeOperator.as
+test/swftests/PrivateCall.as
+test/swftests/PrivateVoidCall.as
+test/swftests/StaticAssignment.as
+test/swftests/StaticRetrieval.as
+test/swftests/StringBasics.as
+test/swftests/StringCharCodeAt.as
+test/swftests/StringConversion.as
+test/test_iqiyi_sdk_interpreter.py
+test/test_jsinterp.py
+test/test_swfinterp.py
+test/test_update.py
+test/test_youtube_signature.py
+test/versions.json
+youtube_dl/swfinterp.py
+youtube_dl/update.py
+
+# flycheck
+flycheck_*.py
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..8cbf5df
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,33 @@
+image: debian:sid
+
+before_script:
+- apt-get update -y
+- apt-get install -y python3-pip jython virtualenv
+
+test_core:
+ script:
+ - virtualenv -p python3 venv
+ - source venv/bin/activate
+ - python --version
+ - pip install nose
+ - export YTDL_TEST_SET=core
+ - export JYTHON=true;
+ - bash ./devscripts/run_tests.sh || true
+
+test_download:
+ script:
+ - virtualenv -p python3 venv
+ - source venv/bin/activate
+ - python --version
+ - pip install nose
+ - export YTDL_TEST_SET=download
+ - export JYTHON=true;
+ - bash ./devscripts/run_tests.sh || true
+
+test_flake8:
+ script:
+ - virtualenv -p python3 venv
+ - source venv/bin/activate
+ - python --version
+ - pip install flake8
+ - flake8 .
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..4a6d7da
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,249 @@
+Ricardo Garcia Gonzalez
+Danny Colligan
+Benjamin Johnson
+Vasyl' Vavrychuk
+Witold Baryluk
+Paweł Paprota
+Gergely Imreh
+Rogério Brito
+Philipp Hagemeister
+Sören Schulze
+Kevin Ngo
+Ori Avtalion
+shizeeg
+Filippo Valsorda
+Christian Albrecht
+Dave Vasilevsky
+Jaime Marquínez Ferrándiz
+Jeff Crouse
+Osama Khalid
+Michael Walter
+M. Yasoob Ullah Khalid
+Julien Fraichard
+Johny Mo Swag
+Axel Noack
+Albert Kim
+Pierre Rudloff
+Huarong Huo
+Ismael Mejía
+Steffan Donal
+Andras Elso
+Jelle van der Waa
+Marcin Cieślak
+Anton Larionov
+Takuya Tsuchida
+Sergey M.
+Michael Orlitzky
+Chris Gahan
+Saimadhav Heblikar
+Mike Col
+Oleg Prutz
+pulpe
+Andreas Schmitz
+Michael Kaiser
+Niklas Laxström
+David Triendl
+Anthony Weems
+David Wagner
+Juan C. Olivares
+Mattias Harrysson
+phaer
+Sainyam Kapoor
+Nicolas Évrard
+Jason Normore
+Hoje Lee
+Adam Thalhammer
+Georg Jähnig
+Ralf Haring
+Koki Takahashi
+Ariset Llerena
+Adam Malcontenti-Wilson
+Tobias Bell
+Naglis Jonaitis
+Charles Chen
+Hassaan Ali
+Dobrosław Żybort
+David Fabijan
+Sebastian Haas
+Alexander Kirk
+Erik Johnson
+Keith Beckman
+Ole Ernst
+Aaron McDaniel (mcd1992)
+Magnus Kolstad
+Hari Padmanaban
+Carlos Ramos
+5moufl
+lenaten
+Dennis Scheiba
+Damon Timm
+winwon
+Xavier Beynon
+Gabriel Schubiner
+xantares
+Jan Matějka
+Mauroy Sébastien
+William Sewell
+Dao Hoang Son
+Oskar Jauch
+Matthew Rayfield
+t0mm0
+Tithen-Firion
+Zack Fernandes
+cryptonaut
+Adrian Kretz
+Mathias Rav
+Petr Kutalek
+Will Glynn
+Max Reimann
+Cédric Luthi
+Thijs Vermeir
+Joel Leclerc
+Christopher Krooss
+Ondřej Caletka
+Dinesh S
+Johan K. Jensen
+Yen Chi Hsuan
+Enam Mijbah Noor
+David Luhmer
+Shaya Goldberg
+Paul Hartmann
+Frans de Jonge
+Robin de Rooij
+Ryan Schmidt
+Leslie P. Polzer
+Duncan Keall
+Alexander Mamay
+Devin J. Pohly
+Eduardo Ferro Aldama
+Jeff Buchbinder
+Amish Bhadeshia
+Joram Schrijver
+Will W.
+Mohammad Teimori Pabandi
+Roman Le Négrate
+Matthias Küch
+Julian Richen
+Ping O.
+Mister Hat
+Peter Ding
+jackyzy823
+George Brighton
+Remita Amine
+Aurélio A. Heckert
+Bernhard Minks
+sceext
+Zach Bruggeman
+Tjark Saul
+slangangular
+Behrouz Abbasi
+ngld
+nyuszika7h
+Shaun Walbridge
+Lee Jenkins
+Anssi Hannula
+Lukáš Lalinský
+Qijiang Fan
+Rémy Léone
+Marco Ferragina
+reiv
+Muratcan Simsek
+Evan Lu
+flatgreen
+Brian Foley
+Vignesh Venkat
+Tom Gijselinck
+Founder Fang
+Andrew Alexeyew
+Saso Bezlaj
+Erwin de Haan
+Jens Wille
+Robin Houtevelts
+Patrick Griffis
+Aidan Rowe
+mutantmonkey
+Ben Congdon
+Kacper Michajłow
+José Joaquín Atria
+Viťas Strádal
+Kagami Hiiragi
+Philip Huppert
+blahgeek
+Kevin Deldycke
+inondle
+Tomáš Čech
+Déstin Reed
+Roman Tsiupa
+Artur Krysiak
+Jakub Adam Wieczorek
+Aleksandar Topuzović
+Nehal Patel
+Rob van Bekkum
+Petr Zvoníček
+Pratyush Singh
+Aleksander Nitecki
+Sebastian Blunt
+Matěj Cepl
+Xie Yanbo
+Philip Xu
+John Hawkinson
+Rich Leeper
+Zhong Jianxin
+Thor77
+Mattias Wadman
+Arjan Verwer
+Costy Petrisor
+Logan B
+Alex Seiler
+Vijay Singh
+Paul Hartmann
+Stephen Chen
+Fabian Stahl
+Bagira
+Odd Stråbø
+Philip Herzog
+Thomas Christlieb
+Marek Rusinowski
+Tobias Gruetzmacher
+Olivier Bilodeau
+Lars Vierbergen
+Juanjo Benages
+Xiao Di Guan
+Thomas Winant
+Daniel Twardowski
+Jeremie Jarosh
+Gerard Rovira
+Marvin Ewald
+Frédéric Bournival
+Timendum
+gritstub
+Adam Voss
+Mike Fährmann
+Jan Kundrát
+Giuseppe Fabiano
+Örn Guðjónsson
+Parmjit Virk
+Genki Sky
+Ľuboš Katrinec
+Corey Nicholson
+Ashutosh Chaudhary
+John Dong
+Tatsuyuki Ishi
+Daniel Weber
+Kay Bouché
+Yang Hongbo
+Lei Wang
+Petr Novák
+Leonardo Taccari
+Martin Weinelt
+Surya Oktafendri
+TingPing
+Alexandre Macabies
+Bastian de Groot
+Niklas Haas
+András Veres-Szentkirályi
+Enes Solak
+Nathan Rossi
+Thomas van der Berg
+Luca Cherubin
+Adrian Heine \ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..86a9e4c
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,435 @@
+**Please include the full output of hypervideo when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
+```
+$ hypervideo -v <your command line>
+[debug] System config: []
+[debug] User config: []
+[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+[debug] hypervideo version 1.1.11
+[debug] Git HEAD: 135392e
+[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
+[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+[debug] Proxy map: {}
+...
+```
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
+
+The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
+
+Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+
+### Is the description of the issue itself sufficient?
+
+We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
+
+So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+
+- What the problem is
+- How it could be fixed
+- How your proposed solution would look like
+
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+
+For bug reports, this means that your report should contain the *complete* output of hypervideo when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
+
+If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
+
+### Are you using the latest version?
+
+Before reporting any issue, type `doas pacman -Sy hypervideo`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
+
+### Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+
+### Why are existing options not enough?
+
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+
+### Is there enough context in your bug report?
+
+People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
+
+We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
+
+### Does the issue involve one problem, and one problem only?
+
+Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
+
+In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of hypervideo that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
+
+### Is anyone going to need the feature?
+
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+
+### Is your question about hypervideo?
+
+It may sound strange, but some bug reports we receive are completely unrelated to hypervideo and relate to a different, or even the reporter's own, application. Please make sure that you are actually using hypervideo. If you are using a UI for hypervideo, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for hypervideo fails in some way you believe is related to hypervideo, by all means, go ahead and report the bug.
+
+# DEVELOPER INSTRUCTIONS
+
+Most users do not need to build hypervideo and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
+
+To run hypervideo as a developer, you don't need to build anything either. Simply execute
+
+ python -m hypervideo_dl
+
+To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
+
+ python -m unittest discover
+ python test/test_download.py
+ nosetests
+
+See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+
+If you want to create a build of hypervideo yourself, you'll need
+
+* python
+* make (only GNU make is supported)
+* pandoc
+* zip
+* nosetests
+
+### Adding support for a new site
+
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. hypervideo does **not support** such sites thus pull requests adding support for them **will be rejected**.
+
+After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
+
+1. [Fork this repository](https://git.conocimientoslibres.ga/software/hypervideo)
+2. Check out the source code with:
+
+ git clone https://git.conocimientoslibres.ga/software/hypervideo
+
+3. Start a new git branch with
+
+ cd hypervideo
+ git checkout -b yourextractor
+
+4. Start with this simple template and save it to `hypervideo_dl/extractor/yourextractor.py`:
+
+ ```python
+ # coding: utf-8
+ from __future__ import unicode_literals
+
+ from .common import InfoExtractor
+
+
+ class YourExtractorIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://yourextractor.com/watch/42',
+ 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+ 'info_dict': {
+ 'id': '42',
+ 'ext': 'mp4',
+ 'title': 'Video title goes here',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ # TODO more properties, either as:
+ # * A value
+ # * MD5 checksum; start the string with md5:
+ # * A regular expression; start the string with re:
+ # * Any Python type (for example int or float)
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # TODO more code goes here, for example ...
+ title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+ # TODO more properties (see hypervideo_dl/extractor/common.py)
+ }
+ ```
+5. Add an import in [`hypervideo_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/extractors.py).
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
+7. Have a look at [`hypervideo_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+8. Make sure your code follows [hypervideo coding conventions](#hypervideo-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+
+ $ flake8 hypervideo_dl/extractor/yourextractor.py
+
+9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by hypervideo, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
+
+ $ git add hypervideo_dl/extractor/extractors.py
+ $ git add hypervideo_dl/extractor/yourextractor.py
+ $ git commit -m '[yourextractor] Add new extractor'
+ $ git push origin yourextractor
+
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+
+In any case, thank you very much for your contributions!
+
+## hypervideo coding conventions
+
+This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
+
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old hypervideo versions working. Even though this breakage issue is easily fixed by emitting a new version of hypervideo with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
+
+### Mandatory and optional metafields
+
+For extraction to work hypervideo relies on metadata your extractor extracts and provides to hypervideo expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+
+ - `id` (media identifier)
+ - `title` (media title)
+ - `url` (media download URL) or `formats`
+
+In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention hypervideo also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
+
+[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+
+#### Example
+
+Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
+
+```python
+meta = self._download_json(url, video_id)
+```
+
+Assume at this point `meta`'s layout is:
+
+```python
+{
+ ...
+ "summary": "some fancy summary text",
+ ...
+}
+```
+
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+
+```python
+description = meta.get('summary') # correct
+```
+
+and not like:
+
+```python
+description = meta['summary'] # incorrect
+```
+
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
+
+Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
+
+```python
+description = self._search_regex(
+ r'<span[^>]+id="title"[^>]*>([^<]+)<',
+ webpage, 'description', fatal=False)
+```
+
+With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
+
+You can also pass `default=<some fallback value>`, for example:
+
+```python
+description = self._search_regex(
+ r'<span[^>]+id="title"[^>]*>([^<]+)<',
+ webpage, 'description', default=None)
+```
+
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
+
+### Provide fallbacks
+
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
+
+#### Example
+
+Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
+
+```python
+title = meta['title']
+```
+
+If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
+
+Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
+
+```python
+title = meta.get('title') or self._og_search_title(webpage)
+```
+
+This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
+
+### Regular expressions
+
+#### Don't capture groups you don't use
+
+Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
+
+##### Example
+
+Don't capture id attribute name here since you can't use it for anything anyway.
+
+Correct:
+
+```python
+r'(?:id|ID)=(?P<id>\d+)'
+```
+
+Incorrect:
+```python
+r'(id|ID)=(?P<id>\d+)'
+```
+
+
+#### Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
+
+##### Example
+
+Say you need to extract `title` from the following HTML code:
+
+```html
+<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
+```
+
+The code for that task should look similar to:
+
+```python
+title = self._search_regex(
+ r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
+```
+
+Or even better:
+
+```python
+title = self._search_regex(
+ r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
+ webpage, 'title', group='title')
+```
+
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
+
+The code definitely should not look like:
+
+```python
+title = self._search_regex(
+ r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
+ webpage, 'title', group='title')
+```
+
+### Long lines policy
+
+There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
+
+For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+
+Correct:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Incorrect:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'<title>([^<]+)</title>'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+```
+
+Incorrect:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list,
+)
+```
+
+### Use convenience conversion and parsing functions
+
+Wrap all extracted numeric data into safe functions from [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+
+Explore [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..680fffd
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,6142 @@
+version 2021.06.06
+
+Extractors
+* [facebook] Improve login required detection
+* [youporn] Fix formats and view count extraction (#29216)
+* [orf:tvthek] Fix thumbnails extraction (#29217)
+* [formula1] Fix extraction (#29206)
+* [ard] Relax URL regular expression and fix video ids (#22724, #29091)
++ [ustream] Detect https embeds (#29133)
+* [ted] Prefer own formats over external sources (#29142)
+* [twitch:clips] Improve extraction (#29149)
++ [twitch:clips] Add access token query to download URLs (#29136)
+* [youtube] Fix get_video_info request (#29086, #29165)
+* [vimeo] Fix vimeo pro embed extraction (#29126)
+* [redbulltv] Fix embed data extraction (#28770)
+* [shahid] Relax URL regular expression (#28772, #28930)
+
+
+version 2021.05.16
+
+Core
+* [options] Fix thumbnail option group name (#29042)
+* [YoutubeDL] Improve extract_info doc (#28946)
+
+Extractors
++ [playstuff] Add support for play.stuff.co.nz (#28901, #28931)
+* [eroprofile] Fix extraction (#23200, #23626, #29008)
++ [vivo] Add support for vivo.st (#29009)
++ [generic] Add support for og:audio (#28311, #29015)
+* [phoenix] Fix extraction (#29057)
++ [generic] Add support for sibnet embeds
++ [vk] Add support for sibnet embeds (#9500)
++ [generic] Add Referer header for direct videojs download URLs (#2879,
+ #20217, #29053)
+* [orf:radio] Switch download URLs to HTTPS (#29012, #29046)
+- [blinkx] Remove extractor (#28941)
+* [medaltv] Relax URL regular expression (#28884)
++ [funimation] Add support for optional lang code in URLs (#28950)
++ [gdcvault] Add support for HTML5 videos
+* [dispeak] Improve FLV extraction (#13513, #28970)
+* [kaltura] Improve iframe extraction (#28969)
+* [kaltura] Make embed code alternatives actually work
+* [cda] Improve extraction (#28709, #28937)
+* [twitter] Improve formats extraction from vmap URL (#28909)
+* [xtube] Fix formats extraction (#28870)
+* [svtplay] Improve extraction (#28507, #28876)
+* [tv2dk] Fix extraction (#28888)
+
+
+version 2021.04.26
+
+Extractors
++ [xfileshare] Add support for wolfstream.tv (#28858)
+* [francetvinfo] Improve video id extraction (#28792)
+* [medaltv] Fix extraction (#28807)
+* [tver] Redirect all downloads to Brightcove (#28849)
+* [go] Improve video id extraction (#25207, #25216, #26058)
+* [youtube] Fix lazy extractors (#28780)
++ [bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774)
+* [cbsnews] Fix extraction for python <3.6 (#23359)
+
+
+version 2021.04.17
+
+Core
++ [utils] Add support for experimental HTTP response status code
+ 308 Permanent Redirect (#27877, #28768)
+
+Extractors
++ [lbry] Add support for HLS videos (#27877, #28768)
+* [youtube] Fix stretched ratio calculation
+* [youtube] Improve stretch extraction (#28769)
+* [youtube:tab] Improve grid extraction (#28725)
++ [youtube:tab] Detect series playlist on playlists page (#28723)
++ [youtube] Add more invidious instances (#28706)
+* [pluralsight] Extend anti-throttling timeout (#28712)
+* [youtube] Improve URL to extractor routing (#27572, #28335, #28742)
++ [maoritv] Add support for maoritelevision.com (#24552)
++ [youtube:tab] Pass innertube context and x-goog-visitor-id header along with
+ continuation requests (#28702)
+* [mtv] Fix Viacom A/B Testing Video Player extraction (#28703)
++ [pornhub] Extract DASH and HLS formats from get_media end point (#28698)
+* [cbssports] Fix extraction (#28682)
+* [jamendo] Fix track extraction (#28686)
+* [curiositystream] Fix format extraction (#26845, #28668)
+
+
+version 2021.04.07
+
+Core
+* [extractor/common] Use compat_cookies_SimpleCookie for _get_cookies
++ [compat] Introduce compat_cookies_SimpleCookie
+* [extractor/common] Improve JSON-LD author extraction
+* [extractor/common] Fix _get_cookies on python 2 (#20673, #23256, #20326,
+ #28640)
+
+Extractors
+* [youtube] Fix extraction of videos with restricted location (#28685)
++ [line] Add support for live.line.me (#17205, #28658)
+* [vimeo] Improve extraction (#28591)
+* [youku] Update ccode (#17852, #28447, #28460, #28648)
+* [youtube] Prefer direct entry metadata over entry metadata from playlist
+ (#28619, #28636)
+* [screencastomatic] Fix extraction (#11976, #24489)
++ [palcomp3] Add support for palcomp3.com (#13120)
++ [arnes] Add support for video.arnes.si (#28483)
++ [youtube:tab] Add support for hashtags (#28308)
+
+
+version 2021.04.01
+
+Extractors
+* [youtube] Setup CONSENT cookie when needed (#28604)
+* [vimeo] Fix password protected review extraction (#27591)
+* [youtube] Improve age-restricted video extraction (#28578)
+
+
+version 2021.03.31
+
+Extractors
+* [vlive] Fix inkey request (#28589)
+* [francetvinfo] Improve video id extraction (#28584)
++ [instagram] Extract duration (#28469)
+* [instagram] Improve title extraction (#28469)
++ [sbs] Add support for ondemand watch URLs (#28566)
+* [youtube] Fix video's channel extraction (#28562)
+* [picarto] Fix live stream extraction (#28532)
+* [vimeo] Fix unlisted video extraction (#28414)
+* [youtube:tab] Fix playlist/community continuation items extraction (#28266)
+* [ard] Improve clip id extraction (#22724, #28528)
+
+
+version 2021.03.25
+
+Extractors
++ [zoom] Add support for zoom.us (#16597, #27002, #28531)
+* [bbc] Fix BBC IPlayer Episodes/Group extraction (#28360)
+* [youtube] Fix default value for youtube_include_dash_manifest (#28523)
+* [zingmp3] Fix extraction (#11589, #16409, #16968, #27205)
++ [vgtv] Add support for new tv.aftonbladet.se URL schema (#28514)
++ [tiktok] Detect private videos (#28453)
+* [vimeo:album] Fix extraction for albums with number of videos multiple
+ to page size (#28486)
+* [vvvvid] Fix kenc format extraction (#28473)
+* [mlb] Fix video extraction (#21241)
+* [svtplay] Improve extraction (#28448)
+* [applepodcasts] Fix extraction (#28445)
+* [rtve] Improve extraction
+ + Extract all formats
+ * Fix RTVE Infantil extraction (#24851)
+ + Extract is_live and series
+
+
+version 2021.03.14
+
+Core
++ Introduce release_timestamp meta field (#28386)
+
+Extractors
++ [southpark] Add support for southparkstudios.com (#28413)
+* [southpark] Fix extraction (#26763, #28413)
+* [sportdeutschland] Fix extraction (#21856, #28425)
+* [pinterest] Reduce the number of HLS format requests
+* [peertube] Improve thumbnail extraction (#28419)
+* [tver] Improve title extraction (#28418)
+* [fujitv] Fix HLS formats extension (#28416)
+* [shahid] Fix format extraction (#28383)
++ [lbry] Add support for channel filters (#28385)
++ [bandcamp] Extract release timestamp
++ [lbry] Extract release timestamp (#28386)
+* [pornhub] Detect flagged videos
++ [pornhub] Extract formats from get_media end point (#28395)
+* [bilibili] Fix video info extraction (#28341)
++ [cbs] Add support for Paramount+ (#28342)
++ [trovo] Add Origin header to VOD formats (#28346)
+* [voxmedia] Fix volume embed extraction (#28338)
+
+
+version 2021.03.03
+
+Extractors
+* [youtube:tab] Switch continuation to browse API (#28289, #28327)
+* [9c9media] Fix extraction for videos with multiple ContentPackages (#28309)
++ [bbc] Add support for BBC Reel videos (#21870, #23660, #28268)
+
+
+version 2021.03.02
+
+Extractors
+* [zdf] Rework extractors (#11606, #13473, #17354, #21185, #26711, #27068,
+ #27930, #28198, #28199, #28274)
+ * Generalize cross-extractor video ids for zdf based extractors
+ * Improve extraction
+ * Fix 3sat and phoenix
+* [stretchinternet] Fix extraction (#28297)
+* [urplay] Fix episode data extraction (#28292)
++ [bandaichannel] Add support for b-ch.com (#21404)
+* [srgssr] Improve extraction (#14717, #14725, #27231, #28238)
+ + Extract subtitle
+ * Fix extraction for new videos
+ * Update srf download domains
+* [vvvvid] Reduce season request payload size
++ [vvvvid] Extract series sublists playlist title (#27601, #27618)
++ [dplay] Extract Ad-Free uplynk URLs (#28160)
++ [wat] Detect DRM protected videos (#27958)
+* [tf1] Improve extraction (#27980, #28040)
+* [tmz] Fix and improve extraction (#24603, #24687, 28211)
++ [gedidigital] Add support for Gedi group sites (#7347, #26946)
+* [youtube] Fix get_video_info request
+
+
+version 2021.02.22
+
+Core
++ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase
+ (#28112)
+
+Extractors
+* [apa] Fix and improve extraction (#27750)
++ [youporn] Extract duration (#28019)
++ [peertube] Add support for canard.tube (#28190)
+* [youtube] Fixup m4a_dash formats (#28165)
++ [samplefocus] Add support for samplefocus.com (#27763)
++ [vimeo] Add support for unlisted video source format extraction
+* [viki] Improve extraction (#26522, #28203)
+ * Extract uploader URL and episode number
+ * Report login required error
+ + Extract 480p formats
+ * Fix API v4 calls
+* [ninegag] Unescape title (#28201)
+* [youtube] Improve URL regular expression (#28193)
++ [youtube] Add support for redirect.invidious.io (#28193)
++ [dplay] Add support for de.hgtv.com (#28182)
++ [dplay] Add support for discoveryplus.com (#24698)
++ [simplecast] Add support for simplecast.com (#24107)
+* [youtube] Fix uploader extraction in flat playlist mode (#28045)
+* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184)
++ [storyfire] Add support for storyfire.com (#25628, #26349)
++ [zhihu] Add support for zhihu.com (#28177)
+* [youtube] Fix controversial videos when authenticated with cookies (#28174)
+* [ccma] Fix timestamp parsing in python 2
++ [videopress] Add support for video.wordpress.com
+* [kakao] Improve info extraction and detect geo restriction (#26577)
+* [xboxclips] Fix extraction (#27151)
+* [ard] Improve formats extraction (#28155)
++ [canvas] Add support for dagelijksekost.een.be (#28119)
+
+
+version 2021.02.10
+
+Extractors
+* [youtube:tab] Improve grid continuation extraction (#28130)
+* [ign] Fix extraction (#24771)
++ [xhamster] Extract format filesize
++ [xhamster] Extract formats from xplayer settings (#28114)
++ [youtube] Add support phone/tablet JS player (#26424)
+* [archiveorg] Fix and improve extraction (#21330, #23586, #25277, #26780,
+ #27109, #27236, #28063)
++ [cda] Detect geo restricted videos (#28106)
+* [urplay] Fix extraction (#28073, #28074)
+* [youtube] Fix release date extraction (#28094)
++ [youtube] Extract abr and vbr (#28100)
+* [youtube] Skip OTF formats (#28070)
+
+
+version 2021.02.04.1
+
+Extractors
+* [youtube] Prefer DASH formats (#28070)
+* [azmedien] Fix extraction (#28064)
+
+
+version 2021.02.04
+
+Extractors
+* [pornhub] Implement lazy playlist extraction
+* [svtplay] Fix video id extraction (#28058)
++ [pornhub] Add support for authentication (#18797, #21416, #24294)
+* [pornhub:user] Improve paging
++ [pornhub:user] Add support for URLs unavailable via /videos page (#27853)
++ [bravotv] Add support for oxygen.com (#13357, #22500)
++ [youtube] Pass embed URL to get_video_info request
+* [ccma] Improve metadata extraction (#27994)
+ + Extract age limit, alt title, categories, series and episode number
+ * Fix timestamp multiple subtitles extraction
+* [egghead] Update API domain (#28038)
+- [vidzi] Remove extractor (#12629)
+* [vidio] Improve metadata extraction
+* [youtube] Improve subtitles extraction
+* [youtube] Fix chapter extraction fallback
+* [youtube] Rewrite extractor
+ * Improve format sorting
+ * Remove unused code
+ * Fix series metadata extraction
+ * Fix trailer video extraction
+ * Improve error reporting
+ + Extract video location
++ [vvvvid] Add support for youtube embeds (#27825)
+* [googledrive] Report download page errors (#28005)
+* [vlive] Fix error message decoding for python 2 (#28004)
+* [youtube] Improve DASH formats file size extraction
+* [cda] Improve birth validation detection (#14022, #27929)
++ [awaan] Extract uploader id (#27963)
++ [medialaan] Add support DPG Media MyChannels based websites (#14871, #15597,
+ #16106, #16489)
+* [abcnews] Fix extraction (#12394, #27920)
+* [AMP] Fix upload date and timestamp extraction (#27970)
+* [tv4] Relax URL regular expression (#27964)
++ [tv2] Add support for mtvuutiset.fi (#27744)
+* [adn] Improve login warning reporting
+* [zype] Fix uplynk id extraction (#27956)
++ [adn] Add support for authentication (#17091, #27841, #27937)
+
+
+version 2021.01.24.1
+
+Core
+* Introduce --output-na-placeholder (#27896)
+
+Extractors
+* [franceculture] Make thumbnail optional (#18807)
+* [franceculture] Fix extraction (#27891, #27903)
+* [njpwworld] Fix extraction (#27890)
+* [comedycentral] Fix extraction (#27905)
+* [wat] Fix format extraction (#27901)
++ [americastestkitchen:season] Add support for seasons (#27861)
++ [trovo] Add support for trovo.live (#26125)
++ [aol] Add support for yahoo videos (#26650)
+* [yahoo] Fix single video extraction
+* [lbry] Unescape lbry URI (#27872)
+* [9gag] Fix and improve extraction (#23022)
+* [americastestkitchen] Improve metadata extraction for ATK episodes (#27860)
+* [aljazeera] Fix extraction (#20911, #27779)
++ [minds] Add support for minds.com (#17934)
+* [ard] Fix title and description extraction (#27761)
++ [spotify] Add support for Spotify Podcasts (#27443)
+
+
+version 2021.01.16
+
+Core
+* [YoutubeDL] Protect from infinite recursion due to recursively nested
+ playlists (#27833)
+* [YoutubeDL] Ignore failure to create existing directory (#27811)
+* [YoutubeDL] Raise syntax error for format selection expressions with multiple
+ + operators (#27803)
+
+Extractors
++ [animeondemand] Add support for lazy playlist extraction (#27829)
+* [youporn] Restrict fallback download URL (#27822)
+* [youporn] Improve height and tbr extraction (#20425, #23659)
+* [youporn] Fix extraction (#27822)
++ [twitter] Add support for unified cards (#27826)
++ [twitch] Add Authorization header with OAuth token for GraphQL requests
+ (#27790)
+* [mixcloud:playlist:base] Extract video id in flat playlist mode (#27787)
+* [cspan] Improve info extraction (#27791)
+* [adn] Improve info extraction
+* [adn] Fix extraction (#26963, #27732)
+* [youtube:search] Extract from all sections (#27604)
+* [youtube:search] fix viewcount and try to extract all video sections (#27604)
+* [twitch] Improve login error extraction
+* [twitch] Fix authentication (#27743)
+* [3qsdn] Improve extraction (#21058)
+* [peertube] Extract formats from streamingPlaylists (#26002, #27586, #27728)
+* [khanacademy] Fix extraction (#2887, #26803)
+* [spike] Update Paramount Network feed URL (#27715)
+
+
+version 2021.01.08
+
+Core
+* [downloader/hls] Disable decryption in tests (#27660)
++ [utils] Add a function to clean podcast URLs
+
+Extractors
+* [rai] Improve subtitles extraction (#27698, #27705)
+* [canvas] Match only supported VRT NU URLs (#27707)
++ [bibeltv] Add support for bibeltv.de (#14361)
++ [bfmtv] Add support for bfmtv.com (#16053, #26615)
++ [sbs] Add support for ondemand play and news embed URLs (#17650, #27629)
+* [twitch] Drop legacy kraken API v5 code altogether and refactor
+* [twitch:vod] Switch to GraphQL for video metadata
+* [canvas] Fix VRT NU extraction (#26957, #27053)
+* [twitch] Switch access token to GraphQL and refactor (#27646)
++ [rai] Detect ContentItem in iframe (#12652, #27673)
+* [ketnet] Fix extraction (#27662)
++ [dplay] Add suport Discovery+ domains (#27680)
+* [motherless] Improve extraction (#26495, #27450)
+* [motherless] Fix recent videos upload date extraction (#27661)
+* [nrk] Fix extraction for videos without a legalAge rating
+- [googleplus] Remove extractor (#4955, #7400)
++ [applepodcasts] Add support for podcasts.apple.com (#25918)
++ [googlepodcasts] Add support for podcasts.google.com
++ [iheart] Add support for iheart.com (#27037)
+* [acast] Clean podcast URLs
+* [stitcher] Clean podcast URLs
++ [xfileshare] Add support for aparat.cam (#27651)
++ [twitter] Add support for summary card (#25121)
+* [twitter] Try to use a Generic fallback for unknown twitter cards (#25982)
++ [stitcher] Add support for shows and show metadata extraction (#20510)
+* [stv] Improve episode id extraction (#23083)
+
+
+version 2021.01.03
+
+Extractors
+* [nrk] Improve series metadata extraction (#27473)
++ [nrk] Extract subtitles
+* [nrk] Fix age limit extraction
+* [nrk] Improve video id extraction
++ [nrk] Add support for podcasts (#27634, #27635)
+* [nrk] Generalize and delegate all item extractors to nrk
++ [nrk] Add support for mp3 formats
+* [nrktv] Switch to playback endpoint
+* [vvvvid] Fix season metadata extraction (#18130)
+* [stitcher] Fix extraction (#20811, #27606)
+* [acast] Fix extraction (#21444, #27612, #27613)
++ [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200)
++ [sky] Add support for Sports News articles and Brighcove videos (#13054)
++ [vvvvid] Extract akamai formats
+* [vvvvid] Skip unplayable episodes (#27599)
+* [yandexvideo] Fix extraction for Python 3.4
+
+
+version 2020.12.31
+
+Core
+* [utils] Accept only supported protocols in url_or_none
+* [YoutubeDL] Allow format filtering using audio language (#16209)
+
+Extractors
++ [redditr] Extract all thumbnails (#27503)
+* [vvvvid] Improve info extraction
++ [vvvvid] Add support for playlists (#18130, #27574)
++ [yandexdisk] Extract info from webpage
+* [yandexdisk] Fix extraction (#17861, #27131)
+* [yandexvideo] Use old API call as fallback
+* [yandexvideo] Fix extraction (#25000)
+- [nbc] Remove CSNNE extractor
+* [nbc] Fix NBCSport VPlayer URL extraction (#16640)
++ [aenetworks] Add support for biography.com (#3863)
+* [uktvplay] Match new video URLs (#17909)
+* [sevenplay] Detect API errors
+* [tenplay] Fix format extraction (#26653)
+* [brightcove] Raise error for DRM protected videos (#23467, #27568)
+
+
+version 2020.12.29
+
+Extractors
+* [youtube] Improve yt initial data extraction (#27524)
+* [youtube:tab] Improve URL matching #27559)
+* [youtube:tab] Restore retry on browse requests (#27313, #27564)
+* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904,
+ #25418, #26070, #26350, #26738, #27563)
+- [brightcove] Remove sonyliv specific code
+* [piksel] Improve format extraction
++ [zype] Add support for uplynk videos
++ [toggle] Add support for live.mewatch.sg (#27555)
++ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826)
+* [teachable] Improve embed detection (#26923)
+* [mitele] Fix free video extraction (#24624, #25827, #26757)
+* [telecinco] Fix extraction
+* [youtube] Update invidious.snopyta.org (#22667)
+* [amcnetworks] Improve auth only video detection (#27548)
++ [generic] Add support for VHX Embeds (#27546)
+
+
+version 2020.12.26
+
+Extractors
+* [instagram] Fix comment count extraction
++ [instagram] Add support for reel URLs (#26234, #26250)
+* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821,
+ #27538)
+* [instagram] Improve thumbnail extraction
+* [instagram] Fix extraction when authenticated (#22880, #26377, #26981,
+ #27422)
+* [spankbang:playlist] Fix extraction (#24087)
++ [spankbang] Add support for playlist videos
+* [pornhub] Improve like and dislike count extraction (#27356)
+* [pornhub] Fix lq formats extraction (#27386, #27393)
++ [bongacams] Add support for bongacams.com (#27440)
+* [youtube:tab] Extend URL regular expression (#27501)
+* [theweatherchannel] Fix extraction (#25930, #26051)
++ [sprout] Add support for Universal Kids (#22518)
+* [theplatform] Allow passing geo bypass countries from other extractors
++ [wistia] Add support for playlists (#27533)
++ [ctv] Add support for ctv.ca (#27525)
+* [9c9media] Improve info extraction
+* [youtube] Fix automatic captions extraction (#27162, #27388)
+* [sonyliv] Fix title for movies
+* [sonyliv] Fix extraction (#25667)
+* [streetvoice] Fix extraction (#27455, #27492)
++ [facebook] Add support for watchparty pages (#27507)
+* [cbslocal] Fix video extraction
++ [brightcove] Add another method to extract policyKey
+* [mewatch] Relax URL regular expression (#27506)
+
+
+version 2020.12.22
+
+Core
+* [common] Remove unwanted query params from unsigned akamai manifest URLs
+
+Extractors
+- [tastytrade] Remove extractor (#25716)
+* [niconico] Fix playlist extraction (#27428)
+- [everyonesmixtape] Remove extractor
+- [kanalplay] Remove extractor
+* [arkena] Fix extraction
+* [nba] Rewrite extractor
+* [turner] Improve info extraction
+* [youtube] Improve xsrf token extraction (#27442)
+* [generic] Improve RSS age limit extraction
+* [generic] Fix RSS itunes thumbnail extraction (#27405)
++ [redditr] Extract duration (#27426)
+- [zaq1] Remove extractor
++ [asiancrush] Add support for retrocrush.tv
+* [asiancrush] Fix extraction
+- [noco] Remove extractor (#10864)
+* [nfl] Fix extraction (#22245)
+* [skysports] Relax URL regular expression (#27435)
++ [tv5unis] Add support for tv5unis.ca (#22399, #24890)
++ [videomore] Add support for more.tv (#27088)
++ [yandexmusic] Add support for music.yandex.com (#27425)
++ [nhk:program] Add support for audio programs and program clips
++ [nhk] Add support for NHK video programs (#27230)
+
+
+version 2020.12.14
+
+Core
+* [extractor/common] Improve JSON-LD interaction statistic extraction (#23306)
+* [downloader/hls] Delegate manifests with media initialization to ffmpeg
++ [extractor/common] Document duration meta field for playlists
+
+Extractors
+* [mdr] Bypass geo restriction
+* [mdr] Improve extraction (#24346, #26873)
+* [yandexmusic:album] Improve album title extraction (#27418)
+* [eporner] Fix view count extraction and make optional (#23306)
++ [eporner] Extend URL regular expression
+* [eporner] Fix hash extraction and extend _VALID_URL (#27396)
+* [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400)
+* [twitcasting] Fix format extraction and improve info extraction (#24868)
+* [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402)
+* [itv] Clean description from HTML tags (#27399)
+* [vlive] Sort live formats (#27404)
+* [hotstart] Fix and improve extraction
+ * Fix format extraction (#26690)
+ + Extract thumbnail URL (#16079, #20412)
+ + Add support for country specific playlist URLs (#23496)
+ * Select the last id in video URL (#26412)
++ [youtube] Add some invidious instances (#27373)
+
+
+version 2020.12.12
+
+Core
+* [YoutubeDL] Improve thumbnail filename deducing (#26010, #27244)
+
+Extractors
++ [ruutu] Extract more metadata
++ [ruutu] Detect non-free videos (#21154)
+* [ruutu] Authenticate format URLs (#21031, #26782)
++ [ruutu] Add support for static.nelonenmedia.fi (#25412)
++ [ruutu] Extend URL regular expression (#24839)
++ [facebook] Add support archived live video URLs (#15859)
+* [wdr] Improve overall extraction
++ [wdr] Extend subtitles extraction (#22672, #22723)
++ [facebook] Add support for videos attached to Relay based story pages
+ (#10795)
++ [wdr:page] Add support for kinder.wdr.de (#27350)
++ [facebook] Add another regular expression for handleServerJS
+* [facebook] Fix embed page extraction
++ [facebook] Add support for Relay post pages (#26935)
++ [facebook] Add support for watch videos (#22795, #27062)
++ [facebook] Add support for group posts with multiple videos (#19131)
+* [itv] Fix series metadata extraction (#26897)
+- [itv] Remove old extraction method (#23177)
+* [facebook] Redirect mobile URLs to desktop URLs (#24831, #25624)
++ [facebook] Add support for Relay based pages (#26823)
+* [facebook] Try to reduce unnecessary tahoe requests
+- [facebook] Remove hardcoded Chrome User-Agent (#18974, #25411, #26958,
+ #27329)
+- [smotri] Remove extractor (#27358)
+- [beampro] Remove extractor (#17290, #22871, #23020, #23061, #26099)
+
+
+version 2020.12.09
+
+Core
+* [extractor/common] Fix inline HTML5 media tags processing (#27345)
+
+Extractors
+* [youtube:tab] Improve identity token extraction (#27197)
+* [youtube:tab] Make click tracking params on continuation optional
+* [youtube:tab] Delegate inline playlists to tab-based playlists (27298)
++ [tubitv] Extract release year (#27317)
+* [amcnetworks] Fix free content extraction (#20354)
++ [lbry:channel] Add support for channels (#25584)
++ [lbry] Add support for short and embed URLs
+* [lbry] Fix channel metadata extraction
++ [telequebec] Add support for video.telequebec.tv (#27339)
+* [telequebec] Fix extraction (#25733, #26883)
++ [youtube:tab] Capture and output alerts (#27340)
+* [tvplay:home] Fix extraction (#21153)
+* [americastestkitchen] Fix Extraction and add support
+ for Cook's Country and Cook's Illustrated (#17234, #27322)
++ [slideslive] Add support for yoda service videos and extract subtitles
+ (#27323)
+
+
+version 2020.12.07
+
+Core
+* [extractor/common] Extract timestamp from Last-Modified header
++ [extractor/common] Add support for dl8-* media tags (#27283)
+* [extractor/common] Fix media type extraction for HTML5 media tags
+ in start/end form
+
+Extractors
+* [aenetworks] Fix extraction (#23363, #23390, #26795, #26985)
+ * Fix Fastly format extraction
+ + Add support for play and watch subdomains
+ + Extract series metadata
+* [youtube] Improve youtu.be extraction in non-existing playlists (#27324)
++ [generic] Extract RSS video description, timestamp and itunes metadata
+ (#27177)
+* [nrk] Reduce the number of instalments and episodes requests
+* [nrk] Improve extraction
+ * Improve format extraction for old akamai formats
+ + Add is_live value to entry info dict
+ * Request instalments only when available
+ * Fix skole extraction
++ [peertube] Extract fps
++ [peertube] Recognize audio-only formats (#27295)
+
+
+version 2020.12.05
+
+Core
+* [extractor/common] Improve Akamai HTTP format extraction
+ * Allow m3u8 manifest without an additional audio format
+ * Fix extraction for qualities starting with a number
+
+Extractors
+* [teachable:course] Improve extraction (#24507, #27286)
+* [nrk] Improve error extraction
+* [nrktv:series] Improve extraction (#21926)
+* [nrktv:season] Improve extraction
+* [nrk] Improve format extraction and geo-restriction detection (#24221)
+* [pornhub] Handle HTTP errors gracefully (#26414)
+* [nrktv] Relax URL regular expression (#27299, #26185)
++ [zdf] Extract webm formats (#26659)
++ [gamespot] Extract DASH and HTTP formats
++ [tver] Add support for tver.jp (#26662, #27284)
++ [pornhub] Add support for pornhub.org (#27276)
+
+
+version 2020.12.02
+
+Extractors
++ [tva] Add support for qub.ca (#27235)
++ [toggle] Detect DRM protected videos (#16479, #20805)
++ [toggle] Add support for new MeWatch URLs (#27256)
+* [youtube:tab] Extract channels only from channels tab (#27266)
++ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030,
+ #18806, #23148, #24461, #26171, #26800, #27263)
+* [cspan] Pass Referer header with format's video URL (#26032, #25729)
+* [youtube] Improve age-gated videos extraction (#27259)
++ [mediaset] Add support for movie URLs (#27240)
+* [yandexmusic] Refactor
++ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284)
+* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762)
+
+
+version 2020.11.29
+
+Core
+* [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug
+ (#14579, #22593)
+
+Extractors
+* [drtv] Extend URL regular expression (#27243)
+* [tiktok] Fix extraction (#20809, #22838, #22850, #25987, #26281, #26411,
+ #26639, #26776, #27237)
++ [ina] Add support for mobile URLs (#27229)
+* [pornhub] Fix like and dislike count extraction (#27227, #27234)
+* [youtube] Improve yt initial player response extraction (#27216)
+* [videa] Fix extraction (#25650, #25973, #26301)
+
+
+version 2020.11.26
+
+Core
+* [downloader/fragment] Set final file's mtime according to last fragment's
+ Last-Modified header (#11718, #18384, #27138)
+
+Extractors
++ [spreaker] Add support for spreaker.com (#13480, #13877)
+* [vlive] Improve extraction for geo-restricted videos
++ [vlive] Add support for post URLs (#27122, #27123)
+* [viki] Fix video API request (#27184)
+* [bbc] Fix BBC Three clip extraction
+* [bbc] Fix BBC News videos extraction
++ [medaltv] Add support for medal.tv (#27149)
+* [youtube] Improve music metadata and license extraction (#26013)
+* [nrk] Fix extraction
+* [cda] Fix extraction (#17803, #24458, #24518, #26381)
+
+
+version 2020.11.24
+
+Core
++ [extractor/common] Add generic support for akamai HTTP format extraction
+
+Extractors
+* [youtube:tab] Fix feeds extraction (#25695, #26452)
+* [youtube:favorites] Restore extractor
+* [youtube:tab] Fix some weird typo (#27157)
++ [pinterest] Add support for large collections (more than 25 pins)
++ [franceinter] Extract thumbnail (#27153)
++ [box] Add support for box.com (#5949)
++ [nytimes] Add support for cooking.nytimes.com (#27112, #27143)
+* [lbry] Relax URL regular expression (#27144)
++ [rumble] Add support for embed pages (#10785)
++ [skyit] Add support for multiple Sky Italia websites (#26629)
++ [pinterest] Add support for pinterest.com (#25747)
+
+
+version 2020.11.21.1
+
+Core
+* [downloader/http] Fix crash during urlopen caused by missing reason
+ of URLError
+* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries
+ of url_transparent (#27064)
+
+Extractors
++ [svtplay] Add support for svt.se/barnkanalen (#24817)
++ [svt] Extract timestamp (#27130)
+* [svtplay] Improve thumbnail extraction (#27130)
+* [youtube] Fix error reason extraction (#27081)
+* [youtube] Fix like and dislike count extraction (#25977)
++ [youtube:tab] Add support for current video and fix lives extraction (#27126)
+* [infoq] Fix format extraction (#25984)
+* [francetv] Update to fix thumbnail URL issue (#27120)
+* [youtube] Improve yt initial data extraction (#27093)
++ [discoverynetworks] Add support new TLC/DMAX URLs (#27100)
+* [rai] Fix protocol relative relinker URLs (#22766)
+* [rai] Fix unavailable video format detection
+* [rai] Improve extraction
+* [rai] Fix extraction (#27077)
+* [viki] Improve format extraction
+* [viki] Fix stream extraction from MPD (#27092)
+* [googledrive] Fix format extraction (#26979)
++ [amara] Add support for amara.org (#20618)
+* [vimeo:album] Fix extraction (#27079)
+* [mtv] Fix mgid extraction (#26841)
+
+
+version 2020.11.19
+
+Core
+* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400,
+ #24151, #25617, #25618, #25586, #26068, #27072)
+
+Extractors
+* [youporn] Fix upload date extraction
+* [youporn] Make comment count optional (#26986)
+* [arte] Rework extractors
+ * Reimplement embed and playlist extractors to delegate to the single
+ entrypoint artetv extractor
+ * Improve embeds detection (#27057)
++ [arte] Extract m3u8 formats (#27061)
+* [mgtv] Fix format extraction (#26415)
++ [lbry] Add support for odysee.com (#26806)
+* [francetv] Improve info extraction
++ [francetv] Add fallback video URL extraction (#27047)
+
+
+version 2020.11.18
+
+Extractors
+* [spiegel] Fix extraction (#24206, #24767)
+* [youtube] Improve extraction
+ + Add support for --no-playlist (#27009)
+ * Improve playlist and mix extraction (#26390, #26509, #26534, #27011)
+ + Extract playlist uploader data
+* [youtube:tab] Fix view count extraction (#27051)
+* [malltv] Fix extraction (#27035)
++ [bandcamp] Extract playlist description (#22684)
+* [urplay] Fix extraction (#26828)
+* [youtube:tab] Fix playlist title extraction (#27015)
+* [youtube] Fix chapters extraction (#26005)
+
+
+version 2020.11.17
+
+Core
+* [utils] Skip ! prefixed code in js_to_json
+
+Extractors
+* [youtube:tab] Fix extraction with cookies provided (#27005)
+* [lrt] Fix extraction with empty tags (#20264)
++ [ndr:embed:base] Extract subtitles (#25447, #26106)
++ [servus] Add support for pm-wissen.com (#25869)
+* [servus] Fix extraction (#26872, #26967, #26983, #27000)
+* [xtube] Fix extraction (#26996)
+* [lrt] Fix extraction
++ [lbry] Add support for lbry.tv
++ [condenast] Extract subtitles
+* [condenast] Fix extraction
+* [bandcamp] Fix extraction (#26681, #26684)
+* [rai] Fix RaiPlay extraction (#26064, #26096)
+* [vlive] Fix extraction
+* [usanetwork] Fix extraction
+* [nbc] Fix NBCNews/Today/MSNBC extraction
+* [cnbc] Fix extraction
+
+
+version 2020.11.12
+
+Extractors
+* [youtube] Rework extractors
+
+
+version 2020.11.01
+
+Core
+* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
+* [downloader/http] Properly handle missing message in SSLError (#26646)
+* [downloader/http] Fix access to not yet opened stream in retry
+
+Extractors
+* [youtube] Fix JS player URL extraction
+* [ytsearch] Fix extraction (#26920)
+* [afreecatv] Fix typo (#26970)
+* [23video] Relax URL regular expression (#26870)
++ [ustream] Add support for video.ibm.com (#26894)
+* [iqiyi] Fix typo (#26884)
++ [expressen] Add support for di.se (#26670)
+* [iprima] Improve video id extraction (#26507, #26494)
+
+
+version 2020.09.20
+
+Core
+* [extractor/common] Relax interaction count extraction in _json_ld
++ [extractor/common] Extract author as uploader for VideoObject in _json_ld
+* [downloader/hls] Fix incorrect end byte in Range HTTP header for
+ media segments with EXT-X-BYTERANGE (#14748, #24512)
+* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601)
+* [downloader/http] Improve timeout detection when reading block of data
+ (#10935)
+* [downloader/http] Retry download when urlopen times out (#10935, #26603)
+
+Extractors
+* [redtube] Extend URL regular expression (#26506)
+* [twitch] Refactor
+* [twitch:stream] Switch to GraphQL and fix reruns (#26535)
++ [telequebec] Add support for brightcove videos (#25833)
+* [pornhub] Extract metadata from JSON-LD (#26614)
+* [pornhub] Fix view count extraction (#26621, #26614)
+
+
+version 2020.09.14
+
+Core
++ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails
+ (#25687, #25717)
+
+Extractors
+* [rtlnl] Extend URL regular expression (#26549, #25821)
+* [youtube] Fix empty description extraction (#26575, #26006)
+* [srgssr] Extend URL regular expression (#26555, #26556, #26578)
+* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689,
+ #26565)
+* [svtplay] Fix id extraction (#26576)
+* [redbulltv] Improve support for rebull.com TV localized URLs (#22063)
++ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063)
+* [soundcloud:pagedplaylist] Reduce pagination limit (#26557)
+
+
+version 2020.09.06
+
+Core
++ [utils] Recognize wav mimetype (#26463)
+
+Extractors
+* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409)
+* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384)
+* [youtube:user] Extend URL regular expression (#26443)
+* [xhamster] Improve initials regular expression (#26526, #26353)
+* [svtplay] Fix video id extraction (#26425, #26428, #26438)
+* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979,
+ #24263, #25010, #25553, #25606)
+ * Switch to GraphQL
+ + Add support for collections
+ + Add support for clips and collections playlists
+* [biqle] Improve video ext extraction
+* [xhamster] Fix extraction (#26157, #26254)
+* [xhamster] Extend URL regular expression (#25789, #25804, #25927))
+
+
+version 2020.07.28
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
+* [youtube] Improve description extraction (#25937, #25980)
+* [wistia] Restrict embed regular expression (#25969)
+* [youtube] Prevent excess HTTP 301 (#25786)
++ [youtube:playlists] Extend URL regular expression (#25810)
++ [bellmedia] Add support for cp24.com clip URLs (#25764)
+* [brightcove] Improve embed detection (#25674)
+
+
+version 2020.06.16.1
+
+Extractors
+* [youtube] Force old layout (#25682, #25683, #25680, #25686)
+* [youtube] Fix categories and improve tags extraction
+
+
+version 2020.06.16
+
+Extractors
+* [youtube] Fix uploader id and uploader URL extraction
+* [youtube] Improve view count extraction
+* [youtube] Fix upload date extraction (#25677)
+* [youtube] Fix thumbnails extraction (#25676)
+* [youtube] Fix playlist and feed extraction (#25675)
++ [facebook] Add support for single-video ID links
++ [youtube] Extract chapters from JSON (#24819)
++ [kaltura] Add support for multiple embeds on a webpage (#25523)
+
+
+version 2020.06.06
+
+Extractors
+* [tele5] Bypass geo restriction
++ [jwplatform] Add support for bypass geo restriction
+* [tele5] Prefer jwplatform over nexx (#25533)
+* [twitch:stream] Expect 400 and 410 HTTP errors from API
+* [twitch:stream] Fix extraction (#25528)
+* [twitch] Fix thumbnails extraction (#25531)
++ [twitch] Pass v5 Accept HTTP header (#25531)
+* [brightcove] Fix subtitles extraction (#25540)
++ [malltv] Add support for sk.mall.tv (#25445)
+* [periscope] Fix untitled broadcasts (#25482)
+* [jwplatform] Improve embeds extraction (#25467)
+
+
+version 2020.05.29
+
+Core
+* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
+* [utils] Fix file permissions in write_json_file (#12471, #25122)
+
+Extractors
+* [ard:beta] Extend URL regular expression (#25405)
++ [youtube] Add support for more invidious instances (#25417)
+* [giantbomb] Extend URL regular expression (#25222)
+* [ard] Improve URL regular expression (#25134, #25198)
+* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
+ #25321)
+* [indavideo] Switch to HTTPS for API request (#25191)
+* [redtube] Improve title extraction (#25208)
+* [vimeo] Improve format extraction and sorting (#25285)
+* [soundcloud] Reduce API playlist page limit (#25274)
++ [youtube] Add support for yewtu.be (#25226)
+* [mailru] Fix extraction (#24530, #25239)
+* [bellator] Fix mgid extraction (#25195)
+
+
+version 2020.05.08
+
+Core
+* [downloader/http] Request last data block of exact remaining size
+* [downloader/http] Finish downloading once received data length matches
+ expected
+* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
+ ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
++ [compat] Introduce compat_cookiejar_Cookie
+* [utils] Improve cookie files support
+ + Add support for UTF-8 in cookie files
+ * Skip malformed cookie file entries instead of crashing (invalid entry
+ length, invalid expires at)
+
+Extractors
+* [youtube] Improve signature cipher extraction (#25187, #25188)
+* [iprima] Improve extraction (#25138)
+* [uol] Fix extraction (#22007)
++ [orf] Add support for more radio stations (#24938, #24968)
+* [dailymotion] Fix typo
+- [puhutv] Remove no longer available HTTP formats (#25124)
+
+
+version 2020.05.03
+
+Core
++ [extractor/common] Extract multiple JSON-LD entries
+* [options] Clarify doc on --exec command (#19087, #24883)
+* [extractor/common] Skip malformed ISM manifest XMLs while extracting
+ ISM formats (#24667)
+
+Extractors
+* [crunchyroll] Fix and improve extraction (#25096, #25060)
+* [youtube] Improve player id extraction
+* [youtube] Use redirected video id if any (#25063)
+* [yahoo] Fix GYAO Player extraction and relax URL regular expression
+ (#24178, #24778)
+* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
+* [tenplay] Relax URL regular expression (#25001)
++ [prosiebensat1] Extract series metadata
+* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
+- [prosiebensat1] Remove 7tv.de support (#24948)
+* [youtube] Fix DRM videos detection (#24736)
+* [thisoldhouse] Fix video id extraction (#24548, #24549)
++ [soundcloud] Extract AAC format (#19173, #24708)
+* [youtube] Skip broken multifeed videos (#24711)
+* [nova:embed] Fix extraction (#24700)
+* [motherless] Fix extraction (#24699)
+* [twitch:clips] Extend URL regular expression (#24290, #24642)
+* [tv4] Fix ISM formats extraction (#24667)
+* [tele5] Fix extraction (#24553)
++ [mofosex] Add support for generic embeds (#24633)
++ [youporn] Add support for generic embeds
++ [spankwire] Add support for generic embeds (#24633)
+* [spankwire] Fix extraction (#18924, #20648)
+
+
+version 2020.03.24
+
+Core
+- [utils] Revert support for cookie files with spaces used instead of tabs
+
+Extractors
+* [teachable] Update upskillcourses and gns3 domains
+* [generic] Look for teachable embeds before wistia
++ [teachable] Extract chapter metadata (#24421)
++ [bilibili] Add support for player.bilibili.com (#24402)
++ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
+* [limelight] Remove disabled API requests (#24255)
+* [soundcloud] Fix download URL extraction (#24394)
++ [cbc:watch] Add support for authentication (#19160)
+* [hellporno] Fix extraction (#24399)
+* [xtube] Fix formats extraction (#24348)
+* [ndr] Fix extraction (#24326)
+* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
+- [nhk] Remove obsolete rtmp formats (#24329)
+* [nhk] Relax URL regular expression (#24329)
+- [vimeo] Revert fix showcase password protected video extraction (#24224)
+
+
+version 2020.03.08
+
+Core
++ [utils] Add support for cookie files with spaces used instead of tabs
+
+Extractors
++ [pornhub] Add support for pornhubpremium.com (#24288)
+- [youtube] Remove outdated code and unnecessary requests
+* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
+* [nhk] Update API version (#24270)
+
+
+version 2020.03.06
+
+Extractors
+* [youtube] Fix age-gated videos support without login (#24248)
+* [vimeo] Fix showcase password protected video extraction (#24224)
+* [pornhub] Improve title extraction (#24184)
+* [peertube] Improve extraction (#23657)
++ [servus] Add support for new URL schema (#23475, #23583, #24142)
+* [vimeo] Fix subtitles URLs (#24209)
+
+
+version 2020.03.01
+
+Core
+* [YoutubeDL] Force redirect URL to unicode on python 2
+- [options] Remove duplicate short option -v for --version (#24162)
+
+Extractors
+* [xhamster] Fix extraction (#24205)
+* [franceculture] Fix extraction (#24204)
++ [telecinco] Add support for article opening videos
+* [telecinco] Fix extraction (#24195)
+* [xtube] Fix metadata extraction (#21073, #22455)
+* [youjizz] Fix extraction (#24181)
+- Remove no longer needed compat_str around geturl
+* [pornhd] Fix extraction (#24128)
++ [teachable] Add support for multiple videos per lecture (#24101)
++ [wistia] Add support for multiple generic embeds (#8347, 11385)
+* [imdb] Fix extraction (#23443)
+* [tv2dk:bornholm:play] Fix extraction (#24076)
+
+
+version 2020.02.16
+
+Core
+* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
+ #10622)
+* [update] Fix updating via symlinks (#23991)
++ [compat] Introduce compat_realpath (#23991)
+
+Extractors
++ [npr] Add support for streams (#24042)
++ [24video] Add support for porn.24video.net (#23779, #23784)
+- [jpopsuki] Remove extractor (#23858)
+* [nova] Improve extraction (#23690)
+* [nova:embed] Improve (#23690)
+* [nova:embed] Fix extraction (#23672)
++ [abc:iview] Add support for 720p (#22907, #22921)
+* [nytimes] Improve format sorting (#24010)
++ [toggle] Add support for mewatch.sg (#23895, #23930)
+* [thisoldhouse] Fix extraction (#23951)
++ [popcorntimes] Add support for popcorntimes.tv (#23949)
+* [sportdeutschland] Update to new API
+* [twitch:stream] Lowercase channel id for stream request (#23917)
+* [tv5mondeplus] Fix extraction (#23907, #23911)
+* [tva] Relax URL regular expression (#23903)
+* [vimeo] Fix album extraction (#23864)
+* [viewlift] Improve extraction
+ * Fix extraction (#23851)
+ + Add support for authentication
+ + Add support for more domains
+* [svt] Fix series extraction (#22297)
+* [svt] Fix article extraction (#22897, #22919)
+* [soundcloud] Improve private playlist/set tracks extraction (#3707)
+
+
+version 2020.01.24
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#23819)
+* [stretchinternet] Fix extraction (#4319)
+* [voicerepublic] Fix extraction
+* [azmedien] Fix extraction (#23783)
+* [businessinsider] Fix jwplatform id extraction (#22929, #22954)
++ [24video] Add support for 24video.vip (#23753)
+* [ivi:compilation] Fix entries extraction (#23770)
+* [ard] Improve extraction (#23761)
+ * Simplify extraction
+ + Extract age limit and series
+ * Bypass geo-restriction
++ [nbc] Add support for nbc multi network URLs (#23049)
+* [americastestkitchen] Fix extraction
+* [zype] Improve extraction
+ + Extract subtitles (#21258)
+ + Support URLs with alternative keys/tokens (#21258)
+ + Extract more metadata
+* [orf:tvthek] Improve geo restricted videos detection (#23741)
+* [soundcloud] Restore previews extraction (#23739)
+
+
+version 2020.01.15
+
+Extractors
+* [yourporn] Fix extraction (#21645, #22255, #23459)
++ [canvas] Add support for new API endpoint (#17680, #18629)
+* [ndr:base:embed] Improve thumbnails extraction (#23731)
++ [vodplatform] Add support for embed.kwikmotion.com domain
++ [twitter] Add support for promo_video_website cards (#23711)
+* [orf:radio] Clean description and improve extraction
+* [orf:fm4] Fix extraction (#23599)
+* [safari] Fix kaltura session extraction (#23679, #23670)
+* [lego] Fix extraction and extract subtitle (#23687)
+* [cloudflarestream] Improve extraction
+ + Add support for bytehighway.net domain
+ + Add support for signed URLs
+ + Extract thumbnail
+* [naver] Improve extraction
+ * Improve geo-restriction handling
+ + Extract automatic captions
+ + Extract uploader metadata
+ + Extract VLive HLS formats
+ * Improve metadata extraction
+- [pandatv] Remove extractor (#23630)
+* [dctp] Fix format extraction (#23656)
++ [scrippsnetworks] Add support for www.discovery.com videos
+* [discovery] Fix anonymous token extraction (#23650)
+* [nrktv:seriebase] Fix extraction (#23625, #23537)
+* [wistia] Improve format extraction and extract subtitles (#22590)
+* [vice] Improve extraction (#23631)
+* [redtube] Detect private videos (#23518)
+
+
+version 2020.01.01
+
+Extractors
+* [brightcove] Invalidate policy key cache on failing requests
+* [pornhub] Improve locked videos detection (#22449, #22780)
++ [pornhub] Add support for m3u8 formats
+* [pornhub] Fix extraction (#22749, #23082)
+* [brightcove] Update policy key on failing requests
+* [spankbang] Improve removed video detection (#23423)
+* [spankbang] Fix extraction (#23307, #23423, #23444)
+* [soundcloud] Automatically update client id on failing requests
+* [prosiebensat1] Improve geo restriction handling (#23571)
+* [brightcove] Cache brightcove player policy keys
+* [teachable] Fail with error message if no video URL found
+* [teachable] Improve locked lessons detection (#23528)
++ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
+* [mitele] Fix extraction (#21354, #23456)
+* [soundcloud] Update client id (#23516)
+* [mailru] Relax URL regular expressions (#23509)
+
+
+version 2019.12.25
+
+Core
+* [utils] Improve str_to_int
++ [downloader/hls] Add ability to override AES decryption key URL (#17521)
+
+Extractors
+* [mediaset] Fix parse formats (#23508)
++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
++ [slideslive] Add support for url and vimeo service names (#23414)
+* [slideslive] Fix extraction (#23413)
+* [twitch:clips] Fix extraction (#23375)
++ [soundcloud] Add support for token protected embeds (#18954)
+* [vk] Improve extraction
+ * Fix User Videos extraction (#23356)
+ * Extract all videos for lists with more than 1000 videos (#23356)
+ + Add support for video albums (#14327, #14492)
+- [kontrtube] Remove extractor
+- [videopremium] Remove extractor
+- [musicplayon] Remove extractor (#9225)
++ [ufctv] Add support for ufcfightpass.imgdge.com and
+ ufcfightpass.imggaming.com (#23343)
++ [twitch] Extract m3u8 formats frame rate (#23333)
++ [imggaming] Add support for playlists and extract subtitles
++ [ufcarabia] Add support for UFC Arabia (#23312)
+* [ufctv] Fix extraction
+* [yahoo] Fix gyao brightcove player id (#23303)
+* [vzaar] Override AES decryption key URL (#17521)
++ [vzaar] Add support for AES HLS manifests (#17521, #23299)
+* [nrl] Fix extraction
+* [teachingchannel] Fix extraction
+* [nintendo] Fix extraction and partially add support for Nintendo Direct
+ videos (#4592)
++ [ooyala] Add better fallback values for domain and streams variables
++ [youtube] Add support youtubekids.com (#23272)
+* [tv2] Detect DRM protection
++ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
+* [tv2] Fix tv2.no article extraction
+* [msn] Improve extraction
+ + Add support for YouTube and NBCSports embeds
+ + Add support for articles with multiple videos
+ * Improve AOL embed support
+ * Improve format extraction
+* [abcotvs] Relax URL regular expression and improve metadata extraction
+ (#18014)
+* [channel9] Reduce response size
+* [adobetv] Improve extraction
+ * Use OnDemandPagedList for list extractors
+ * Reduce show extraction requests
+ * Extract original video format and subtitles
+ + Add support for adobe tv embeds
+
+
+version 2019.11.28
+
+Core
++ [utils] Add generic caesar cipher and rot47
+* [utils] Handle rd-suffixed day parts in unified_strdate (#23199)
+
+Extractors
+* [vimeo] Improve extraction
+ * Fix review extraction
+ * Fix ondemand extraction
+ * Make password protected player case as an expected error (#22896)
+ * Simplify channel based extractors code
+- [openload] Remove extractor (#11999)
+- [verystream] Remove extractor
+- [streamango] Remove extractor (#15406)
+* [dailymotion] Improve extraction
+ * Extract http formats included in m3u8 manifest
+ * Fix user extraction (#3553, #21415)
+ + Add support for User Authentication (#11491)
+ * Fix password protected videos extraction (#23176)
+ * Respect age limit option and family filter cookie value (#18437)
+ * Handle video url playlist query param
+ * Report allowed countries for geo-restricted videos
+* [corus] Improve extraction
+ + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com
+ and disneylachaine.ca (#20861)
+ + Add support for self hosted videos (#22075)
+ * Detect DRM protection (#14910, #9164)
+* [vivo] Fix extraction (#22328, #22279)
++ [bitchute] Extract upload date (#22990, #23193)
+* [soundcloud] Update client id (#23214)
+
+
+version 2019.11.22
+
+Core
++ [extractor/common] Clean jwplayer description HTML tags
++ [extractor/common] Add data, headers and query to all major extract formats
+ methods
+
+Extractors
+* [chaturbate] Fix extraction (#23010, #23012)
++ [ntvru] Add support for non relative file URLs (#23140)
+* [vk] Fix wall audio thumbnails extraction (#23135)
+* [ivi] Fix format extraction (#21991)
+- [comcarcoff] Remove extractor
++ [drtv] Add support for new URL schema (#23059)
++ [nexx] Add support for Multi Player JS Setup (#23052)
++ [teamcoco] Add support for new videos (#23054)
+* [soundcloud] Check if the soundtrack has downloads left (#23045)
+* [facebook] Fix posts video data extraction (#22473)
+- [addanime] Remove extractor
+- [minhateca] Remove extractor
+- [daisuki] Remove extractor
+* [seeker] Fix extraction
+- [revision3] Remove extractors
+* [twitch] Fix video comments URL (#18593, #15828)
+* [twitter] Improve extraction
+ + Add support for generic embeds (#22168)
+ * Always extract http formats for native videos (#14934)
+ + Add support for Twitter Broadcasts (#21369)
+ + Extract more metadata
+ * Improve VMap format extraction
+ * Unify extraction code for both twitter statuses and cards
++ [twitch] Add support for Clip embed URLs
+* [lnkgo] Fix extraction (#16834)
+* [mixcloud] Improve extraction
+ * Improve metadata extraction (#11721)
+ * Fix playlist extraction (#22378)
+ * Fix user mixes extraction (#15197, #17865)
++ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384)
+* [onionstudios] Fix extraction
++ [hotstar] Pass Referer header to format requests (#22836)
+* [dplay] Minimize response size
++ [patreon] Extract uploader_id and filesize
+* [patreon] Minimize response size
+* [roosterteeth] Fix login request (#16094, #22689)
+
+
+version 2019.11.05
+
+Extractors
++ [scte] Add support for learning.scte.org (#22975)
++ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
+* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
+* [jamendo] Improve extraction
+ * Fix album extraction (#18564)
+ * Improve metadata extraction (#18565, #21379)
+* [mediaset] Relax URL guid matching (#18352)
++ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
+* [telegraaf] Fix extraction
++ [bellmedia] Add support for marilyn.ca videos (#22193)
+* [stv] Fix extraction (#22928)
+- [iconosquare] Remove extractor
+- [keek] Remove extractor
+- [gameone] Remove extractor (#21778)
+- [flipagram] Remove extractor
+- [bambuser] Remove extractor
+* [wistia] Reduce embed extraction false positives
++ [wistia] Add support for inline embeds (#22931)
+- [go90] Remove extractor
+* [kakao] Remove raw request
++ [kakao] Extract format total bitrate
+* [daum] Fix VOD and Clip extraction (#15015)
+* [kakao] Improve extraction
+ + Add support for embed URLs
+ + Add support for Kakao Legacy vid based embed URLs
+ * Only extract fields used for extraction
+ * Strip description and extract tags
+* [mixcloud] Fix cloudcast data extraction (#22821)
+* [yahoo] Improve extraction
+ + Add support for live streams (#3597, #3779, #22178)
+ * Bypass cookie consent page for european domains (#16948, #22576)
+ + Add generic support for embeds (#20332)
+* [tv2] Fix and improve extraction (#22787)
++ [tv2dk] Add support for TV2 DK sites
+* [onet] Improve extraction …
+ + Add support for onet100.vod.pl
+ + Extract m3u8 formats
+ * Correct audio only format info
+* [fox9] Fix extraction
+
+
+version 2019.10.29
+
+Core
+* [utils] Actualize major IPv4 address blocks per country
+
+Extractors
++ [go] Add support for abc.com and freeform.com (#22823, #22864)
++ [mtv] Add support for mtvjapan.com
+* [mtv] Fix extraction for mtv.de (#22113)
+* [videodetective] Fix extraction
+* [internetvideoarchive] Fix extraction
+* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
+- [hark] Remove extractor
+- [tutv] Remove extractor
+- [learnr] Remove extractor
+- [macgamestore] Remove extractor
+* [la7] Update Kaltura service URL (#22358)
+* [thesun] Fix extraction (#16966)
+- [makertv] Remove extractor
++ [tenplay] Add support for 10play.com.au (#21446)
+* [soundcloud] Improve extraction
+ * Improve format extraction (#22123)
+ + Extract uploader_id and uploader_url (#21916)
+ + Extract all known thumbnails (#19071, #20659)
+ * Fix extraction for private playlists (#20976)
+ + Add support for playlist embeds (#20976)
+ * Skip preview formats (#22806)
+* [dplay] Improve extraction
+ + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
+ * Fix it.dplay.com extraction (#22826)
+ + Extract creator, tags and thumbnails
+ * Handle playback API call errors
++ [discoverynetworks] Add support for dplay.co.uk
+* [vk] Improve extraction
+ + Add support for Odnoklassniki embeds
+ + Extract more videos from user lists (#4470)
+ + Fix wall post audio extraction (#18332)
+ * Improve error detection (#22568)
++ [odnoklassniki] Add support for embeds
+* [puhutv] Improve extraction
+ * Fix subtitles extraction
+ * Transform HLS URLs to HTTP URLs
+ * Improve metadata extraction
+* [ceskatelevize] Skip DRM media
++ [facebook] Extract subtitles (#22777)
+* [globo] Handle alternative hash signing method
+
+
+version 2019.10.22
+
+Core
+* [utils] Improve subtitles_filename (#22753)
+
+Extractors
+* [facebook] Bypass download rate limits (#21018)
++ [contv] Add support for contv.com
+- [viewster] Remove extractor
+* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
+ * Update the list of domains
+ + Add support for aa-encoded video data
+ * Improve jwplayer format extraction
+ + Add support for Clappr sources
+* [mangomolo] Fix video format extraction and add support for player URLs
+* [audioboom] Improve metadata extraction
+* [twitch] Update VOD URL matching (#22395, #22727)
+- [mit] Remove support for video.mit.edu (#22403)
+- [servingsys] Remove extractor (#22639)
+* [dumpert] Fix extraction (#22428, #22564)
+* [atresplayer] Fix extraction (#16277, #16716)
+
+
+version 2019.10.16
+
+Core
+* [extractor/common] Make _is_valid_url more relaxed
+
+Extractors
+* [vimeo] Improve album videos id extraction (#22599)
++ [globo] Extract subtitles (#22713)
+* [bokecc] Improve player params extraction (#22638)
+* [nexx] Handle result list (#22666)
+* [vimeo] Fix VHX embed extraction
+* [nbc] Switch to graphql API (#18581, #22693, #22701)
+- [vessel] Remove extractor
+- [promptfile] Remove extractor (#6239)
+* [kaltura] Fix service URL extraction (#22658)
+* [kaltura] Fix embed info strip (#22658)
+* [globo] Fix format extraction (#20319)
+* [redtube] Improve metadata extraction (#22492, #22615)
+* [pornhub:uservideos:upload] Fix extraction (#22619)
++ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
+- [wimp] Remove extractor (#22088, #22091)
++ [gfycat] Extend URL regular expression (#22225)
++ [chaturbate] Extend URL regular expression (#22309)
+* [peertube] Update instances (#22414)
++ [telequebec] Add support for coucou.telequebec.tv (#22482)
++ [xvideos] Extend URL regular expression (#22471)
+- [youtube] Remove support for invidious.enkirton.net (#22543)
++ [openload] Add support for oload.monster (#22592)
+* [nrktv:seriebase] Fix extraction (#22596)
++ [youtube] Add support for yt.lelux.fi (#22597)
+* [orf:tvthek] Make manifest requests non fatal (#22578)
+* [teachable] Skip login when already logged in (#22572)
+* [viewlift] Improve extraction (#22545)
+* [nonktube] Fix extraction (#22544)
+
+
+version 2019.09.28
+
+Core
+* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
+
+Extractors
+* [vk] Fix extraction (#22522)
+* [heise] Fix kaltura embeds extraction (#22514)
+* [ted] Check for resources validity and extract subtitled downloads (#22513)
++ [youtube] Add support for
+ owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
++ [nhk] Add support for clips
+* [nhk] Fix video extraction (#22249, #22353)
+* [byutv] Fix extraction (#22070)
++ [openload] Add support for oload.online (#22304)
++ [youtube] Add support for invidious.drycat.fr (#22451)
+* [jwplatfom] Do not match video URLs (#20596, #22148)
+* [youtube:playlist] Unescape playlist uploader (#22483)
++ [bilibili] Add support audio albums and songs (#21094)
++ [instagram] Add support for tv URLs
++ [mixcloud] Allow uppercase letters in format URLs (#19280)
+* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
+ #12842, #13912, #15669, #16303)
+* [hotstar] Use native HLS downloader by default
++ [hotstar] Extract more formats (#22323)
+* [9now] Fix extraction (#22361)
+* [zdf] Bypass geo restriction
++ [tv4] Extract series metadata
+* [tv4] Fix extraction (#22443)
+
+
+version 2019.09.12.1
+
+Extractors
+* [youtube] Remove quality and tbr for itag 43 (#22372)
+
+
+version 2019.09.12
+
+Extractors
+* [youtube] Quick extraction tempfix (#22367, #22163)
+
+
+version 2019.09.01
+
+Core
++ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
+ #21859)
++ [downloader/external] Respect mtime option for aria2c (#22242)
+
+Extractors
++ [xhamster:user] Add support for user pages (#16330, #18454)
++ [xhamster] Add support for more domains
++ [verystream] Add support for woof.tube (#22217)
++ [dailymotion] Add support for lequipe.fr (#21328, #22152)
++ [openload] Add support for oload.vip (#22205)
++ [bbccouk] Extend URL regular expression (#19200)
++ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
+* [safari] Fix authentication (#22161, #22184)
+* [usanetwork] Fix extraction (#22105)
++ [einthusan] Add support for einthusan.ca (#22171)
+* [youtube] Improve unavailable message extraction (#22117)
++ [piksel] Extract subtitles (#20506)
+
+
+version 2019.08.13
+
+Core
+* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
+* [YoutubeDL] Check annotations availability (#18582)
+
+Extractors
+* [youtube:playlist] Improve flat extraction (#21927)
+* [youtube] Fix annotations extraction (#22045)
++ [discovery] Extract series meta field (#21808)
+* [youtube] Improve error detection (#16445)
+* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
++ [roosterteeth] Add support for watch URLs
+* [discovery] Limit video data by show slug (#21980)
+
+
+version 2019.08.02
+
+Extractors
++ [tvigle] Add support for HLS and DASH formats (#21967)
+* [tvigle] Fix extraction (#21967)
++ [yandexvideo] Add support for DASH formats (#21971)
+* [discovery] Use API call for video data extraction (#21808)
++ [mgtv] Extract format_note (#21881)
+* [tvn24] Fix metadata extraction (#21833, #21834)
+* [dlive] Relax URL regular expression (#21909)
++ [openload] Add support for oload.best (#21913)
+* [youtube] Improve metadata extraction for age gate content (#21943)
+
+
+version 2019.07.30
+
+Extractors
+* [youtube] Fix and improve title and description extraction (#21934)
+
+
+version 2019.07.27
+
+Extractors
++ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
++ [discovery] Add support go.discovery.com URLs
+* [youtube:playlist] Relax video regular expression (#21844)
+* [generic] Restrict --default-search schemeless URLs detection pattern
+ (#21842)
+* [vrv] Fix CMS signing query extraction (#21809)
+
+
+version 2019.07.16
+
+Extractors
++ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
+ (#21281, #21290)
+* [kaltura] Check source format URL (#21290)
+* [ctsnews] Fix YouTube embeds extraction (#21678)
++ [einthusan] Add support for einthusan.com (#21748, #21775)
++ [youtube] Add support for invidious.mastodon.host (#21777)
++ [gfycat] Extend URL regular expression (#21779, #21780)
+* [youtube] Restrict is_live extraction (#21782)
+
+
+version 2019.07.14
+
+Extractors
+* [porn91] Fix extraction (#21312)
++ [yandexmusic] Extract track number and disk number (#21421)
++ [yandexmusic] Add support for multi disk albums (#21420, #21421)
+* [lynda] Handle missing subtitles (#20490, #20513)
++ [youtube] Add more invidious instances to URL regular expression (#21694)
+* [twitter] Improve uploader id extraction (#21705)
+* [spankbang] Fix and improve metadata extraction
+* [spankbang] Fix extraction (#21763, #21764)
++ [dlive] Add support for dlive.tv (#18080)
++ [livejournal] Add support for livejournal.com (#21526)
+* [roosterteeth] Fix free episode extraction (#16094)
+* [dbtv] Fix extraction
+* [bellator] Fix extraction
+- [rudo] Remove extractor (#18430, #18474)
+* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224)
+* [bleacherreport] Fix Bleacher Report CMS extraction
+* [espn] Fix fivethirtyeight.com extraction
+* [5tv] Relax video URL regular expression and support https URLs
+* [youtube] Fix is_live extraction (#21734)
+* [youtube] Fix authentication (#11270)
+
+
+version 2019.07.12
+
+Core
++ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016)
+
+Extractors
++ [mgtv] Pass Referer HTTP header for format URLs (#21726)
++ [beeg] Add support for api/v6 v2 URLs without t argument (#21701)
+* [voxmedia:volume] Improvevox embed extraction (#16846)
+* [funnyordie] Move extraction to VoxMedia extractor (#16846)
+* [gameinformer] Fix extraction (#8895, #15363, #17206)
+* [funk] Fix extraction (#17915)
+* [packtpub] Relax lesson URL regular expression (#21695)
+* [packtpub] Fix extraction (#21268)
+* [philharmoniedeparis] Relax URL regular expression (#21672)
+* [peertube] Detect embed URLs in generic extraction (#21666)
+* [mixer:vod] Relax URL regular expression (#21657, #21658)
++ [lecturio] Add support id based URLs (#21630)
++ [go] Add site info for disneynow (#21613)
+* [ted] Restrict info regular expression (#21631)
+* [twitch:vod] Actualize m3u8 URL (#21538, #21607)
+* [vzaar] Fix videos with empty title (#21606)
+* [tvland] Fix extraction (#21384)
+* [arte] Clean extractor (#15583, #21614)
+
+
+version 2019.07.02
+
+Core
++ [utils] Introduce random_user_agent and use as default User-Agent (#21546)
+
+Extractors
++ [vevo] Add support for embed.vevo.com URLs (#21565)
++ [openload] Add support for oload.biz (#21574)
+* [xiami] Update API base URL (#21575)
+* [yourporn] Fix extraction (#21585)
++ [acast] Add support for URLs with episode id (#21444)
++ [dailymotion] Add support for DM.player embeds
+* [soundcloud] Update client id
+
+
+version 2019.06.27
+
+Extractors
++ [go] Add support for disneynow.com (#21528)
+* [mixer:vod] Relax URL regular expression (#21531, #21536)
+* [drtv] Relax URL regular expression
+* [fusion] Fix extraction (#17775, #21269)
+- [nfb] Remove extractor (#21518)
++ [beeg] Add support for api/v6 v2 URLs (#21511)
++ [brightcove:new] Add support for playlists (#21331)
++ [openload] Add support for oload.life (#21495)
+* [vimeo:channel,group] Make title extraction non fatal
+* [vimeo:likes] Implement extrator in terms of channel extractor (#21493)
++ [pornhub] Add support for more paged video sources
++ [pornhub] Add support for downloading single pages and search pages (#15570)
+* [pornhub] Rework extractors (#11922, #16078, #17454, #17936)
++ [youtube] Add another signature function pattern
+* [tf1] Fix extraction (#21365, #21372)
+* [crunchyroll] Move Accept-Language workaround to video extractor since
+ it causes playlists not to list any videos
+* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443)
+
+
+version 2019.06.21
+
+Core
+* [utils] Restrict parse_codecs and add theora as known vcodec (#21381)
+
+Extractors
+* [youtube] Update signature function patterns (#21469, #21476)
+* [youtube] Make --write-annotations non fatal (#21452)
++ [sixplay] Add support for rtlmost.hu (#21405)
+* [youtube] Hardcode codec metadata for av01 video only formats (#21381)
+* [toutv] Update client key (#21370)
++ [biqle] Add support for new embed domain
+* [cbs] Improve DRM protected videos detection (#21339)
+
+
+version 2019.06.08
+
+Core
+* [downloader/common] Improve rate limit (#21301)
+* [utils] Improve strip_or_none
+* [extractor/common] Strip src attribute for HTML5 entries code (#18485,
+ #21169)
+
+Extractors
+* [ted] Fix playlist extraction (#20844, #21032)
+* [vlive:playlist] Fix video extraction when no playlist is found (#20590)
++ [vlive] Add CH+ support (#16887, #21209)
++ [openload] Add support for oload.website (#21329)
++ [tvnow] Extract HD formats (#21201)
++ [redbulltv] Add support for rrn:content URLs (#21297)
+* [youtube] Fix average rating extraction (#21304)
++ [bitchute] Extract HTML5 formats (#21306)
+* [cbsnews] Fix extraction (#9659, #15397)
+* [vvvvid] Relax URL regular expression (#21299)
++ [prosiebensat1] Add support for new API (#21272)
++ [vrv] Extract adaptive_hls formats (#21243)
+* [viki] Switch to HTTPS (#21001)
+* [LiveLeak] Check if the original videos exist (#21206, #21208)
+* [rtp] Fix extraction (#15099)
+* [youtube] Improve DRM protected videos detection (#1774)
++ [srgssrplay] Add support for popupvideoplayer URLs (#21155)
++ [24video] Add support for porno.24video.net (#21194)
++ [24video] Add support for 24video.site (#21193)
+- [pornflip] Remove extractor
+- [criterion] Remove extractor (#21195)
+* [pornhub] Use HTTPS (#21061)
+* [bitchute] Fix uploader extraction (#21076)
+* [streamcloud] Reduce waiting time to 6 seconds (#21092)
+- [novamov] Remove extractors (#21077)
++ [openload] Add support for oload.press (#21135)
+* [vivo] Fix extraction (#18906, #19217)
+
+
+version 2019.05.20
+
+Core
++ [extractor/common] Move workaround for applying first Set-Cookie header
+ into a separate _apply_first_set_cookie_header method
+
+Extractors
+* [safari] Fix authentication (#21090)
+* [vk] Use _apply_first_set_cookie_header
+* [vrt] Fix extraction (#20527)
++ [canvas] Add support for vrtnieuws and sporza site ids and extract
+ AES HLS formats
++ [vrv] Extract captions (#19238)
+* [tele5] Improve video id extraction
+* [tele5] Relax URL regular expression (#21020, #21063)
+* [svtplay] Update API URL (#21075)
++ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
+
+
+version 2019.05.11
+
+Core
+* [utils] Transliterate "þ" as "th" (#20897)
+
+Extractors
++ [cloudflarestream] Add support for videodelivery.net (#21049)
++ [byutv] Add support for DVR videos (#20574, #20676)
++ [gfycat] Add support for URLs with tags (#20696, #20731)
++ [openload] Add support for verystream.com (#20701, #20967)
+* [youtube] Use sp field value for signature field name (#18841, #18927,
+ #21028)
++ [yahoo:gyao] Extend URL regular expression (#21008)
+* [youtube] Fix channel id extraction (#20982, #21003)
++ [sky] Add support for news.sky.com (#13055)
++ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965)
++ [francetvinfo] Extend video id extraction (#20619, #20740)
+* [4tube] Update token hosts (#20918)
+* [hotstar] Move to API v2 (#20931)
+* [fox] Fix API error handling under python 2 (#20925)
++ [redbulltv] Extend URL regular expression (#20922)
+
+
+version 2019.04.30
+
+Extractors
+* [openload] Use real Chrome versions (#20902)
+- [youtube] Remove info el for get_video_info request
+* [youtube] Improve extraction robustness
+- [dramafever] Remove extractor (#20868)
+* [adn] Fix subtitle extraction (#12724)
++ [ccc] Extract creator (#20355)
++ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355)
++ [sverigesradio] Add support for sverigesradio.se (#18635)
++ [cinemax] Add support for cinemax.com
+* [sixplay] Try extracting non-DRM protected manifests (#20849)
++ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742)
+- [wrzuta] Remove extractor (#20684, #20801)
+* [twitch] Prefer source format (#20850)
++ [twitcasting] Add support for private videos (#20843)
+* [reddit] Validate thumbnail URL (#20030)
+* [yandexmusic] Fix track URL extraction (#20820)
+
+
+version 2019.04.24
+
+Extractors
+* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766,
+ #20767, #20769, #20771, #20768, #20770)
+* [toutv] Fix extraction and extract series info (#20757)
++ [vrv] Add support for movie listings (#19229)
++ [youtube] Print error when no data is available (#20737)
++ [soundcloud] Add support for new rendition and improve extraction (#20699)
++ [ooyala] Add support for geo verification proxy
++ [nrl] Add support for nrl.com (#15991)
++ [vimeo] Extract live archive source format (#19144)
++ [vimeo] Add support for live streams and improve info extraction (#19144)
++ [ntvcojp] Add support for cu.ntv.co.jp
++ [nhk] Extract RTMPT format
++ [nhk] Add support for audio URLs
++ [udemy] Add another course id extraction pattern (#20491)
++ [openload] Add support for oload.services (#20691)
++ [openload] Add support for openloed.co (#20691, #20693)
+* [bravotv] Fix extraction (#19213)
+
+
+version 2019.04.17
+
+Extractors
+* [openload] Randomize User-Agent (#20688)
++ [openload] Add support for oladblock domains (#20471)
+* [adn] Fix subtitle extraction (#12724)
++ [aol] Add support for localized websites
++ [yahoo] Add support GYAO episode URLs
++ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098)
++ [yahoo] Add support for gyao.yahoo.co.jp
+* [aenetworks] Fix history topic extraction and extract more formats
++ [cbs] Extract smpte and vtt subtitles
++ [streamango] Add support for streamcherry.com (#20592)
++ [yourporn] Add support for sxyprn.com (#20646)
+* [mgtv] Fix extraction (#20650)
+* [linkedin:learning] Use urljoin for form action URL (#20431)
++ [gdc] Add support for kaltura embeds (#20575)
+* [dispeak] Improve mp4 bitrate extraction
+* [kaltura] Sanitize embed URLs
+* [jwplatfom] Do not match manifest URLs (#20596)
+* [aol] Restrict URL regular expression and improve format extraction
++ [tiktok] Add support for new URL schema (#20573)
++ [stv:player] Add support for player.stv.tv (#20586)
+
+
+version 2019.04.07
+
+Core
++ [downloader/external] Pass rtmp_conn to ffmpeg
+
+Extractors
++ [ruutu] Add support for audio podcasts (#20473, #20545)
++ [xvideos] Extract all thumbnails (#20432)
++ [platzi] Add support for platzi.com (#20562)
+* [dvtv] Fix extraction (#18514, #19174)
++ [vrv] Add basic support for individual movie links (#19229)
++ [bfi:player] Add support for player.bfi.org.uk (#19235)
+* [hbo] Fix extraction and extract subtitles (#14629, #13709)
+* [youtube] Extract srv[1-3] subtitle formats (#20566)
+* [adultswim] Fix extraction (#18025)
+* [teamcoco] Fix extraction and add support for subdomains (#17099, #20339)
+* [adn] Fix subtitle compatibility with ffmpeg
+* [adn] Fix extraction and add support for positioning styles (#20549)
+* [vk] Use unique video id (#17848)
+* [newstube] Fix extraction
+* [rtl2] Actualize extraction
++ [adobeconnect] Add support for adobeconnect.com (#20283)
++ [gaia] Add support for authentication (#14605)
++ [mediasite] Add support for dashed ids and named catalogs (#20531)
+
+
+version 2019.04.01
+
+Core
+* [utils] Improve int_or_none and float_or_none (#20403)
+* Check for valid --min-sleep-interval when --max-sleep-interval is specified
+ (#20435)
+
+Extractors
++ [weibo] Extend URL regular expression (#20496)
++ [xhamster] Add support for xhamster.one (#20508)
++ [mediasite] Add support for catalogs (#20507)
++ [teamtreehouse] Add support for teamtreehouse.com (#9836)
++ [ina] Add support for audio URLs
+* [ina] Improve extraction
+* [cwtv] Fix episode number extraction (#20461)
+* [npo] Improve DRM detection
++ [pornhub] Add support for DASH formats (#20403)
+* [svtplay] Update API endpoint (#20430)
+
+
+version 2019.03.18
+
+Core
+* [extractor/common] Improve HTML5 entries extraction
++ [utils] Introduce parse_bitrate
+* [update] Hide update URLs behind redirect
+* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346)
+
+Extractors
++ [yandexvideo] Add extractor
+* [openload] Improve embed detection
++ [corus] Add support for bigbrothercanada.ca (#20357)
++ [orf:radio] Extract series (#20012)
++ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359)
+- [anysex] Remove extractor (#19279)
++ [ciscolive] Add support for new URL schema (#20320, #20351)
++ [youtube] Add support for invidiou.sh (#20309)
+- [anitube] Remove extractor (#20334)
+- [ruleporn] Remove extractor (#15344, #20324)
+* [npr] Fix extraction (#10793, #13440)
+* [biqle] Fix extraction (#11471, #15313)
+* [viddler] Modernize
+* [moevideo] Fix extraction
+* [primesharetv] Remove extractor
+* [hypem] Modernize and extract more metadata (#15320)
+* [veoh] Fix extraction
+* [escapist] Modernize
+- [videomega] Remove extractor (#10108)
++ [beeg] Add support for beeg.porn (#20306)
+* [vimeo:review] Improve config url extraction and extract original format
+ (#20305)
+* [fox] Detect geo restriction and authentication errors (#20208)
+
+
+version 2019.03.09
+
+Core
+* [extractor/common] Use compat_etree_Element
++ [compat] Introduce compat_etree_Element
+* [extractor/common] Fallback url to base URL for DASH formats
+* [extractor/common] Do not fail on invalid data while parsing F4M manifest
+ in non fatal mode
+* [extractor/common] Return MPD manifest as format's url meta field (#20242)
+* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
+
+Extractors
+* [francetv:site] Relax video id regular expression (#20268)
+* [toutv] Detect invalid login error
+* [toutv] Fix authentication (#20261)
++ [urplay] Extract timestamp (#20235)
++ [openload] Add support for oload.space (#20246)
+* [facebook] Improve uploader extraction (#20250)
+* [bbc] Use compat_etree_Element
+* [crunchyroll] Use compat_etree_Element
+* [npo] Improve ISM extraction
+* [rai] Improve extraction (#20253)
+* [paramountnetwork] Fix mgid extraction (#20241)
+* [libsyn] Improve extraction (#20229)
++ [youtube] Add more invidious instances to URL regular expression (#20228)
+* [spankbang] Fix extraction (#20023)
+* [espn] Extend URL regular expression (#20013)
+* [sixplay] Handle videos with empty assets (#20016)
++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
+
+
+version 2019.03.01
+
+Core
++ [downloader/external] Add support for rate limit and retries for wget
+* [downloader/external] Fix infinite retries for curl (#19303)
+
+Extractors
+* [npo] Fix extraction (#20084)
+* [francetv:site] Extend video id regex (#20029, #20071)
++ [periscope] Extract width and height (#20015)
+* [servus] Fix extraction (#19297)
+* [bbccouk] Make subtitles non fatal (#19651)
+* [metacafe] Fix family filter bypass (#19287)
+
+
+version 2019.02.18
+
+Extractors
+* [tvp:website] Fix and improve extraction
++ [tvp] Detect unavailable videos
+* [tvp] Fix description extraction and make thumbnail optional
++ [linuxacademy] Add support for linuxacademy.com (#12207)
+* [bilibili] Update keys (#19233)
+* [udemy] Extend URL regular expressions (#14330, #15883)
+* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
+* [noovo] Fix extraction (#19230)
+* [rai] Relax URL regular expression (#19232)
++ [vshare] Pass Referer to download request (#19205, #19221)
++ [openload] Add support for oload.live (#19222)
+* [imgur] Use video id as title fallback (#18590)
++ [twitch] Add new source format detection approach (#19193)
+* [tvplayhome] Fix video id extraction (#19190)
+* [tvplayhome] Fix episode metadata extraction (#19190)
+* [rutube:embed] Fix extraction (#19163)
++ [rutube:embed] Add support private videos (#19163)
++ [soundcloud] Extract more metadata
++ [trunews] Add support for trunews.com (#19153)
++ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
+
+
+version 2019.02.08
+
+Core
+* [utils] Improve JSON-LD regular expression (#18058)
+* [YoutubeDL] Fallback to ie_key of matching extractor while making
+ download archive id when no explicit ie_key is provided (#19022)
+
+Extractors
++ [malltv] Add support for mall.tv (#18058, #17856)
++ [spankbang:playlist] Add support for playlists (#19145)
+* [spankbang] Extend URL regular expression
+* [trutv] Fix extraction (#17336)
+* [toutv] Fix authentication (#16398, #18700)
+* [pornhub] Fix tags and categories extraction (#13720, #19135)
+* [pornhd] Fix formats extraction
++ [pornhd] Extract like count (#19123, #19125)
+* [radiocanada] Switch to the new media requests (#19115)
++ [teachable] Add support for courses.workitdaily.com (#18871)
+- [vporn] Remove extractor (#16276)
++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
++ [drtuber] Extract duration (#19078)
+* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
+* [soundcloud] Update client id
+* [drtv] Improve preference (#19079)
++ [openload] Add support for openload.pw and oload.pw (#18930)
++ [openload] Add support for oload.info (#19073)
+* [crackle] Authorize media detail request (#16931)
+
+
+version 2019.01.30.1
+
+Core
+* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
+
+
+version 2019.01.30
+
+Core
+* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
+ subtitles (#19024, #19042)
+* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
+
+Extractors
+* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
+* [drtv] Improve extraction (#19039)
+ + Add support for EncryptedUri videos
+ + Extract more metadata
+ * Fix subtitles extraction
++ [fox] Add support for locked videos using cookies (#19060)
+* [fox] Fix extraction for free videos (#19060)
++ [zattoo] Add support for tv.salt.ch (#19059)
+
+
+version 2019.01.27
+
+Core
++ [extractor/common] Extract season in _json_ld
+* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
+ (#681)
+
+Extractors
+* [vice] Fix extraction for locked videos (#16248)
++ [wakanim] Detect DRM protected videos
++ [wakanim] Add support for wakanim.tv (#14374)
+* [usatoday] Fix extraction for videos with custom brightcove partner id
+ (#18990)
+* [drtv] Fix extraction (#18989)
+* [nhk] Extend URL regular expression (#18968)
+* [go] Fix Adobe Pass requests for Disney Now (#18901)
++ [openload] Add support for oload.club (#18969)
+
+
+version 2019.01.24
+
+Core
+* [YoutubeDL] Fix negation for string operators in format selection (#18961)
+
+
+version 2019.01.23
+
+Core
+* [utils] Fix urljoin for paths with non-http(s) schemes
+* [extractor/common] Improve jwplayer relative URL handling (#18892)
++ [YoutubeDL] Add negation support for string comparisons in format selection
+ expressions (#18600, #18805)
+* [extractor/common] Improve HLS video-only format detection (#18923)
+
+Extractors
+* [crunchyroll] Extend URL regular expression (#18955)
+* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
+ #17197, #18338 #18842, #18899)
++ [vrv] Add support for authentication (#14307)
+* [videomore:season] Fix extraction
+* [videomore] Improve extraction (#18908)
++ [tnaflix] Pass Referer in metadata request (#18925)
+* [radiocanada] Relax DRM check (#18608, #18609)
+* [vimeo] Fix video password verification for videos protected by
+ Referer HTTP header
++ [hketv] Add support for hkedcity.net (#18696)
++ [streamango] Add support for fruithosts.net (#18710)
++ [instagram] Add support for tags (#18757)
++ [odnoklassniki] Detect paid videos (#18876)
+* [ted] Correct acodec for HTTP formats (#18923)
+* [cartoonnetwork] Fix extraction (#15664, #17224)
+* [vimeo] Fix extraction for password protected player URLs (#18889)
+
+
+version 2019.01.17
+
+Extractors
+* [youtube] Extend JS player signature function name regular expressions
+ (#18890, #18891, #18893)
+
+
+version 2019.01.16
+
+Core
++ [test/helper] Add support for maxcount and count collection len checkers
+* [downloader/hls] Fix uplynk ad skipping (#18824)
+* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
+
+Extractors
+* [youtube] Skip unsupported adaptive stream type (#18804)
++ [youtube] Extract DASH formats from player response (#18804)
+* [funimation] Fix extraction (#14089)
+* [skylinewebcams] Fix extraction (#18853)
++ [curiositystream] Add support for non app URLs
++ [bitchute] Check formats (#18833)
+* [wistia] Extend URL regular expression (#18823)
++ [playplustv] Add support for playplus.com (#18789)
+
+
+version 2019.01.10
+
+Core
+* [extractor/common] Use episode name as title in _json_ld
++ [extractor/common] Add support for movies in _json_ld
+* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
+ (#18765)
++ [utils] Add language codes replaced in 1989 revision of ISO 639
+ to ISO639Utils (#18765)
+
+Extractors
+* [youtube] Extract live HLS URL from player response (#18799)
++ [outsidetv] Add support for outsidetv.com (#18774)
+* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
++ [fox] Add support National Geographic (#17985, #15333, #14698)
++ [playplustv] Add support for playplus.tv (#18789)
+* [globo] Set GLBID cookie manually (#17346)
++ [gaia] Add support for gaia.com (#14605)
+* [youporn] Fix title and description extraction (#18748)
++ [hungama] Add support for hungama.com (#17402, #18771)
+* [dtube] Fix extraction (#18741)
+* [tvnow] Fix and rework extractors and prepare for a switch to the new API
+ (#17245, #18499)
+* [carambatv:page] Fix extraction (#18739)
+
+
+version 2019.01.02
+
+Extractors
+* [discovery] Use geo verification headers (#17838)
++ [packtpub] Add support for subscription.packtpub.com (#18718)
+* [yourporn] Fix extraction (#18583)
++ [acast:channel] Add support for play.acast.com (#18587)
++ [extractors] Add missing age limits (#18621)
++ [rmcdecouverte] Add support for live stream
+* [rmcdecouverte] Bypass geo restriction
+* [rmcdecouverte] Update URL regular expression (#18595, 18697)
+* [manyvids] Fix extraction (#18604, #18614)
+* [bitchute] Fix extraction (#18567)
+
+
+version 2018.12.31
+
+Extractors
++ [bbc] Add support for another embed pattern (#18643)
++ [npo:live] Add support for npostart.nl (#18644)
+* [beeg] Fix extraction (#18610, #18626)
+* [youtube] Unescape HTML for series (#18641)
++ [youtube] Extract more format metadata
+* [youtube] Detect DRM protected videos (#1774)
+* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
+* [youtube] Extend HTML5 player regular expression (#17516)
++ [liveleak] Add support for another embed type and restore original
+ format extraction
++ [crackle] Extract ISM and HTTP formats
++ [twitter] Pass Referer with card request (#18579)
+* [mediasite] Extend URL regular expression (#18558)
++ [lecturio] Add support for lecturio.de (#18562)
++ [discovery] Add support for Scripps Networks watch domains (#17947)
+
+
+version 2018.12.17
+
+Extractors
+* [ard:beta] Improve geo restricted videos extraction
+* [ard:beta] Fix subtitles extraction
+* [ard:beta] Improve extraction robustness
+* [ard:beta] Relax URL regular expression (#18441)
+* [acast] Add support for embed.acast.com and play.acast.com (#18483)
+* [iprima] Relax URL regular expression (#18515, #18540)
+* [vrv] Fix initial state extraction (#18553)
+* [youtube] Fix mark watched (#18546)
++ [safari] Add support for learning.oreilly.com (#18510)
+* [youtube] Fix multifeed extraction (#18531)
+* [lecturio] Improve subtitles extraction (#18488)
+* [uol] Fix format URL extraction (#18480)
++ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
+
+
+version 2018.12.09
+
+Core
+* [YoutubeDL] Keep session cookies in cookie file between runs
+* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
+
+Extractors
++ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
++ [aenetworks] Add support for historyvault.com (#18460)
+* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
+ #17223, #18404)
+* [iprima] Relax URL regular expression (#18453)
+* [hotstar] Fix video data extraction (#18386)
+* [ard:mediathek] Fix title and description extraction (#18349, #18371)
+* [xvideos] Switch to HTTPS (#18422, #18427)
++ [lecturio] Add support for lecturio.com (#18405)
++ [nrktv:series] Add support for extra materials
+* [nrktv:season,series] Fix extraction (#17159, #17258)
+* [nrktv] Relax URL regular expression (#18304, #18387)
+* [yourporn] Fix extraction (#18424, #18425)
+* [tbs] Fix info extraction (#18403)
++ [gamespot] Add support for review URLs
+
+
+version 2018.12.03
+
+Core
+* [utils] Fix random_birthday to generate existing dates only (#18284)
+
+Extractors
++ [tiktok] Add support for tiktok.com (#18108, #18135)
+* [pornhub] Use actual URL host for requests (#18359)
+* [lynda] Fix authentication (#18158, #18217)
+* [gfycat] Update API endpoint (#18333, #18343)
++ [hotstar] Add support for alternative app state layout (#18320)
+* [azmedien] Fix extraction (#18334, #18336)
++ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
+* [joj] Fix extraction (#18280, #18281)
++ [wistia] Add support for fast.wistia.com (#18287)
+
+
+version 2018.11.23
+
+Core
++ [setup.py] Add more relevant classifiers
+
+Extractors
+* [mixcloud] Fallback to hardcoded decryption key (#18016)
+* [nbc:news] Fix article extraction (#16194)
+* [foxsports] Fix extraction (#17543)
+* [loc] Relax regular expression and improve formats extraction
++ [ciscolive] Add support for ciscolive.cisco.com (#17984)
+* [nzz] Relax kaltura regex (#18228)
+* [sixplay] Fix formats extraction
+* [bitchute] Improve title extraction
+* [kaltura] Limit requested MediaEntry fields
++ [americastestkitchen] Add support for zype embeds (#18225)
++ [pornhub] Add pornhub.net alias
+* [nova:embed] Fix extraction (#18222)
+
+
+version 2018.11.18
+
+Extractors
++ [wwe] Extract subtitles
++ [wwe] Add support for playlists (#14781)
++ [wwe] Add support for wwe.com (#14781, #17450)
+* [vk] Detect geo restriction (#17767)
+* [openload] Use original host during extraction (#18211)
+* [atvat] Fix extraction (#18041)
++ [rte] Add support for new API endpoint (#18206)
+* [tnaflixnetwork:embed] Fix extraction (#18205)
+* [picarto] Use API and add token support (#16518)
++ [zype] Add support for player.zype.com (#18143)
+* [vivo] Fix extraction (#18139)
+* [ruutu] Update API endpoint (#18138)
+
+
+version 2018.11.07
+
+Extractors
++ [youtube] Add another JS signature function name regex (#18091, #18093,
+ #18094)
+* [facebook] Fix tahoe request (#17171)
+* [cliphunter] Fix extraction (#18083)
++ [youtube:playlist] Add support for invidio.us (#18077)
+* [zattoo] Arrange API hosts for derived extractors (#18035)
++ [youtube] Add fallback metadata extraction from videoDetails (#18052)
+
+
+version 2018.11.03
+
+Core
+* [extractor/common] Ensure response handle is not prematurely closed before
+ it can be read if it matches expected_status (#17195, #17846, #17447)
+
+Extractors
+* [laola1tv:embed] Set correct stream access URL scheme (#16341)
++ [ehftv] Add support for ehftv.com (#15408)
+* [azmedien] Adopt to major site redesign (#17745, #17746)
++ [twitcasting] Add support for twitcasting.tv (#17981)
+* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
++ [openload] Add support for oload.fun (#18045)
+* [njpwworld] Fix authentication (#17427)
++ [linkedin:learning] Add support for linkedin.com/learning (#13545)
+* [theplatform] Improve error detection (#13222)
+* [cnbc] Simplify extraction (#14280, #17110)
++ [cbnc] Add support for new URL schema (#14193)
+* [aparat] Improve extraction and extract more metadata (#17445, #18008)
+* [aparat] Fix extraction
+
+
+version 2018.10.29
+
+Core
++ [extractor/common] Add validation for JSON-LD URLs
+
+Extractors
++ [sportbox] Add support for matchtv.ru
+* [sportbox] Fix extraction (#17978)
+* [screencast] Fix extraction (#14590, #14617, #17990)
++ [openload] Add support for oload.icu
++ [ivi] Add support for ivi.tv
+* [crunchyroll] Improve extraction failsafeness (#17991)
+* [dailymail] Fix formats extraction (#17976)
+* [viewster] Reduce format requests
+* [cwtv] Handle API errors (#17905)
++ [rutube] Use geo verification headers (#17897)
++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
+- [tv3] Remove extractor (#10461, #15339)
+* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
++ [openload] Add support for oload.cc (#17823)
++ [patreon] Extract post_file URL (#17792)
+* [patreon] Fix extraction (#14502, #10471)
+
+
+version 2018.10.05
+
+Extractors
+* [pluralsight] Improve authentication (#17762)
+* [dailymotion] Fix extraction (#17699)
+* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
+* [philharmoniedeparis] Fix extraction (#17705)
++ [jamendo] Add support for licensing.jamendo.com (#17724)
++ [openload] Add support for oload.cloud (#17710)
+* [pluralsight] Fix subtitles extraction (#17726, #17728)
++ [vimeo] Add another config regular expression (#17690)
+* [spike] Fix Paramount Network extraction (#17677)
+* [hotstar] Fix extraction (#14694, #14931, #17637)
+
+
+version 2018.09.26
+
+Extractors
+* [pluralsight] Fix subtitles extraction (#17671)
+* [mediaset] Improve embed support (#17668)
++ [youtube] Add support for invidio.us (#17613)
++ [zattoo] Add support for more zattoo platform sites
+* [zattoo] Fix extraction (#17175, #17542)
+
+
+version 2018.09.18
+
+Core
++ [extractor/common] Introduce channel meta fields
+
+Extractors
+* [adobepass] Don't pollute default headers dict
+* [udemy] Don't pollute default headers dict
+* [twitch] Don't pollute default headers dict
+* [youtube] Don't pollute default query dict (#17593)
+* [crunchyroll] Prefer hardsubless formats and formats in locale language
+* [vrv] Make format ids deterministic
+* [vimeo] Fix ondemand playlist extraction (#14591)
++ [pornhub] Extract upload date (#17574)
++ [porntube] Extract channel meta fields
++ [vimeo] Extract channel meta fields
++ [youtube] Extract channel meta fields (#9676, #12939)
+* [porntube] Fix extraction (#17541)
+* [asiancrush] Fix extraction (#15630)
++ [twitch:clips] Extend URL regular expression (#17559)
++ [vzaar] Add support for HLS
+* [tube8] Fix metadata extraction (#17520)
+* [eporner] Extract JSON-LD (#17519)
+
+
+version 2018.09.10
+
+Core
++ [utils] Properly recognize AV1 codec (#17506)
+
+Extractors
++ [iprima] Add support for prima.iprima.cz (#17514)
++ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
+* [nbc] Fix extraction of percent encoded URLs (#17374)
+
+
+version 2018.09.08
+
+Extractors
+* [youtube] Fix extraction (#17457, #17464)
++ [pornhub:uservideos] Add support for new URLs (#17388)
+* [iprima] Confirm adult check (#17437)
+* [slideslive] Make check for video service name case-insensitive (#17429)
+* [radiojavan] Fix extraction (#17151)
+* [generic] Skip unsuccessful jwplayer extraction (#16735)
+
+
+version 2018.09.01
+
+Core
+* [utils] Skip remote IP addresses non matching to source address' IP version
+ when creating a connection (#13422, #17362)
+
+Extractors
++ [ard] Add support for one.ard.de (#17397)
+* [niconico] Fix extraction on python3 (#17393, #17407)
+* [ard] Extract f4m formats
+* [crunchyroll] Parse vilos media data (#17343)
++ [ard] Add support for Beta ARD Mediathek
++ [bandcamp] Extract more metadata (#13197)
+* [internazionale] Fix extraction of non-available-abroad videos (#17386)
+
+
+version 2018.08.28
+
+Extractors
++ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
+ (#17361)
+* [bitchute] Fix extraction by pass custom User-Agent (#17360)
+* [webofstories:playlist] Fix extraction (#16914)
++ [tvplayhome] Add support for new tvplay URLs (#17344)
++ [generic] Allow relative src for videojs embeds (#17324)
++ [xfileshare] Add support for vidto.se (#17317)
++ [vidzi] Add support for vidzi.nu (#17316)
++ [nova:embed] Add support for media.cms.nova.cz (#17282)
+
+
+version 2018.08.22
+
+Core
+* [utils] Use pure browser header for User-Agent (#17236)
+
+Extractors
++ [kinopoisk] Add support for kinopoisk.ru (#17283)
++ [yourporn] Add support for yourporn.sexy (#17298)
++ [go] Add support for disneynow.go.com (#16299, #17264)
++ [6play] Add support for play.rtl.hr (#17249)
+* [anvato] Fallback to generic API key for access-key-to-API-key lookup
+ (#16788, #17254)
+* [lci] Fix extraction (#17274)
+* [bbccouk] Extend id URL regular expression (#17270)
+* [cwtv] Fix extraction (#17256)
+* [nova] Fix extraction (#17241)
++ [generic] Add support for expressen embeds
+* [raywenderlich] Adapt to site redesign (#17225)
++ [redbulltv] Add support redbull.com tv URLs (#17218)
++ [bitchute] Add support for bitchute.com (#14052)
++ [clyp] Add support for token protected media (#17184)
+* [imdb] Fix extension extraction (#17167)
+
+
+version 2018.08.04
+
+Extractors
+* [funk:channel] Improve byChannelAlias extraction (#17142)
+* [twitch] Fix authentication (#17024, #17126)
+* [twitch:vod] Improve URL regular expression (#17135)
+* [watchbox] Fix extraction (#17107)
+* [pbs] Fix extraction (#17109)
+* [theplatform] Relax URL regular expression (#16181, #17097)
++ [viqeo] Add support for viqeo.tv (#17066)
+
+
+version 2018.07.29
+
+Extractors
+* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
++ [pornhub] Add support for subtitles (#16924, #17088)
+* [ceskatelevize] Use https for API call (#16997, #16999)
+* [dailymotion:playlist] Fix extraction (#16894)
+* [ted] Improve extraction
+* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
+* [telecinco] Fix extraction (#17080)
+* [mitele] Reduce number of requests
+* [rai] Return non HTTP relinker URL intact (#17055)
+* [vk] Fix extraction for inline only videos (#16923)
+* [streamcloud] Fix extraction (#17054)
+* [facebook] Fix tahoe player extraction with authentication (#16655)
++ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
+
+
+version 2018.07.21
+
+Core
++ [utils] Introduce url_or_none
+* [utils] Allow JSONP without function name (#17028)
++ [extractor/common] Extract DASH and MSS formats from SMIL manifests
+
+Extractors
++ [bbc] Add support for BBC Radio Play pages (#17022)
+* [iwara] Fix download URLs (#17026)
+* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
++ [viu] Pass Referer and Origin headers and area id (#16992)
++ [vimeo] Add another config regular expression (#17013)
++ [facebook] Extract view count (#16942)
+* [dailymotion] Improve description extraction (#16984)
+* [slutload] Fix and improve extraction (#17001)
+* [mediaset] Fix extraction (#16977)
++ [theplatform] Add support for theplatform TLD customization (#16977)
+* [imgur] Relax URL regular expression (#16987)
+* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
+ #16959)
+
+
+version 2018.07.10
+
+Core
+* [utils] Share JSON-LD regular expression
+* [downloader/dash] Improve error handling (#16927)
+
+Extractors
++ [nrktv] Add support for new season and serie URL schema
++ [nrktv] Add support for new episode URL schema (#16909)
++ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
+* [funk] Fix extraction (#16918)
+* [watchbox] Fix extraction (#16904)
+* [dplayit] Sort formats
+* [dplayit] Fix extraction (#16901)
+* [youtube] Improve login error handling (#13822)
+
+
+version 2018.07.04
+
+Core
+* [extractor/common] Properly escape % in MPD templates (#16867)
+* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
+* Prefer ffmpeg over avconv by default (#8622)
+
+Extractors
+* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
+* [lynda] Simplify login and improve error capturing (#16891)
++ [go90] Add support for embed URLs (#16873)
+* [go90] Detect geo restriction error and pass geo verification headers
+ (#16874)
+* [vlive] Fix live streams extraction (#16871)
+* [npo] Fix typo (#16872)
++ [mediaset] Add support for new videos and extract all formats (#16568)
+* [dctptv] Restore extraction based on REST API (#16850)
+* [svt] Improve extraction and add support for pages (#16802)
+* [porncom] Fix extraction (#16808)
+
+
+version 2018.06.25
+
+Extractors
+* [joj] Relax URL regular expression (#16771)
+* [brightcove] Workaround sonyliv DRM protected videos (#16807)
+* [motherless] Fix extraction (#16786)
+* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
+- [foxnews:insider] Remove extractor (#15810)
++ [foxnews] Add support for iframe embeds (#15810, #16711)
+
+
+version 2018.06.19
+
+Core
++ [extractor/common] Introduce expected_status in _download_* methods
+ for convenient accept of HTTP requests failed with non 2xx status codes
++ [compat] Introduce compat_integer_types
+
+Extractors
+* [peertube] Improve generic support (#16733)
++ [6play] Use geo verification headers
+* [rtbf] Fix extraction for python 3.2
+* [vgtv] Improve HLS formats extraction
++ [vgtv] Add support for www.aftonbladet.se/tv URLs
+* [bbccouk] Use expected_status
+* [markiza] Expect 500 HTTP status code
+* [tvnow] Try all clear manifest URLs (#15361)
+
+
+version 2018.06.18
+
+Core
+* [downloader/rtmp] Fix downloading in verbose mode (#16736)
+
+Extractors
++ [markiza] Add support for markiza.sk (#16750)
+* [wat] Try all supported adaptive URLs
++ [6play] Add support for rtlplay.be and extract hd usp formats
++ [rtbf] Add support for audio and live streams (#9638, #11923)
++ [rtbf] Extract HLS, DASH and all HTTP formats
++ [rtbf] Extract subtitles
++ [rtbf] Fixup specific HTTP URLs (#16101)
++ [expressen] Add support for expressen.se
+* [vidzi] Fix extraction (#16678)
+* [pbs] Improve extraction (#16623, #16684)
+* [bilibili] Restrict cid regular expression (#16638, #16734)
+
+
+version 2018.06.14
+
+Core
+* [downloader/http] Fix retry on error when streaming to stdout (#16699)
+
+Extractors
++ [discoverynetworks] Add support for disco-api videos (#16724)
++ [dailymotion] Add support for password protected videos (#9789)
++ [abc:iview] Add support for livestreams (#12354)
+* [abc:iview] Fix extraction (#16704)
++ [crackle] Add support for sonycrackle.com (#16698)
++ [tvnet] Add support for tvnet.gov.vn (#15462)
+* [nrk] Update API hosts and try all previously known ones (#16690)
+* [wimp] Fix Youtube embeds extraction
+
+
+version 2018.06.11
+
+Extractors
+* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
++ [inc] Add support for another embed schema (#16666)
+* [tv4] Fix format extraction (#16650)
++ [nexx] Add support for free cdn (#16538)
++ [pbs] Add another cove id pattern (#15373)
++ [rbmaradio] Add support for 192k format (#16631)
+
+
+version 2018.06.04
+
+Extractors
++ [camtube] Add support for camtube.co
++ [twitter:card] Extract guest token (#16609)
++ [chaturbate] Use geo verification headers
++ [bbc] Add support for bbcthree (#16612)
+* [youtube] Move metadata extraction after video availability check
++ [youtube] Extract track and artist
++ [safari] Add support for new URL schema (#16614)
+* [adn] Fix extraction
+
+
+version 2018.06.02
+
+Core
+* [utils] Improve determine_ext
+
+Extractors
++ [facebook] Add support for tahoe player videos (#15441, #16554)
+* [cbc] Improve extraction (#16583, #16593)
+* [openload] Improve ext extraction (#16595)
++ [twitter:card] Add support for another endpoint (#16586)
++ [openload] Add support for oload.win and oload.download (#16592)
+* [audimedia] Fix extraction (#15309)
++ [francetv] Add support for sport.francetvinfo.fr (#15645)
+* [mlb] Improve extraction (#16587)
+- [nhl] Remove old extractors
+* [rbmaradio] Check formats availability (#16585)
+
+
+version 2018.05.30
+
+Core
+* [downloader/rtmp] Generalize download messages and report time elapsed
+ on finish
+* [downloader/rtmp] Gracefully handle live streams interrupted by user
+
+Extractors
+* [teamcoco] Fix extraction for full episodes (#16573)
+* [spiegel] Fix info extraction (#16538)
++ [apa] Add support for apa.at (#15041, #15672)
++ [bellmedia] Add support for bnnbloomberg.ca (#16560)
++ [9c9media] Extract MPD formats and subtitles
+* [cammodels] Use geo verification headers
++ [ufctv] Add support for authentication (#16542)
++ [cammodels] Add support for cammodels.com (#14499)
+* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
+ (#16551)
+* [soundcloud] Detect format extension (#16549)
+* [cbc] Fix playlist title extraction (#16502)
++ [tumblr] Detect and report sensitive media (#13829)
++ [tumblr] Add support for authentication (#15133)
+
+
+version 2018.05.26
+
+Core
+* [utils] Improve parse_age_limit
+
+Extractors
+* [audiomack] Stringify video id (#15310)
+* [izlesene] Fix extraction (#16233, #16271, #16407)
++ [indavideo] Add support for generic embeds (#11989)
+* [indavideo] Fix extraction (#11221)
+* [indavideo] Sign download URLs (#16174)
++ [peertube] Add support for PeerTube based sites (#16301, #16329)
+* [imgur] Fix extraction (#16537)
++ [hidive] Add support for authentication (#16534)
++ [nbc] Add support for stream.nbcsports.com (#13911)
++ [viewlift] Add support for hoichoi.tv (#16536)
+* [go90] Extract age limit and detect DRM protection(#10127)
+* [viewlift] fix extraction for snagfilms.com (#15766)
+* [globo] Improve extraction (#4189)
+ * Add support for authentication
+ * Simplify URL signing
+ * Extract DASH and MSS formats
+* [leeco] Fix extraction (#16464)
+* [teamcoco] Add fallback for format extraction (#16484)
+* [teamcoco] Improve URL regular expression (#16484)
+* [imdb] Improve extraction (#4085, #14557)
+
+
+version 2018.05.18
+
+Extractors
+* [vimeo:likes] Relax URL regular expression and fix single page likes
+ extraction (#16475)
+* [pluralsight] Fix clip id extraction (#16460)
++ [mychannels] Add support for mychannels.com (#15334)
+- [moniker] Remove extractor (#15336)
+* [pbs] Fix embed data extraction (#16474)
++ [mtv] Add support for paramountnetwork.com and bellator.com (#15418)
+* [youtube] Fix hd720 format position
+* [dailymotion] Remove fragment part from m3u8 URLs (#8915)
+* [3sat] Improve extraction (#15350)
+ * Extract all formats
+ * Extract more format metadata
+ * Improve format sorting
+ * Use hls native downloader
+ * Detect and bypass geo-restriction
++ [dtube] Add support for d.tube (#15201)
+* [options] Fix typo (#16450)
+* [youtube] Improve format filesize extraction (#16453)
+* [youtube] Make uploader extraction non fatal (#16444)
+* [youtube] Fix extraction for embed restricted live streams (#16433)
+* [nbc] Improve info extraction (#16440)
+* [twitch:clips] Fix extraction (#16429)
+* [redditr] Relax URL regular expression (#16426, #16427)
+* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424)
++ [nick] Add support for nickjr.de (#13230)
+* [teamcoco] Fix extraction (#16374)
+
+
+version 2018.05.09
+
+Core
+* [YoutubeDL] Ensure ext exists for automatic captions
+* Introduce --geo-bypass-ip-block
+
+Extractors
++ [udemy] Extract asset captions
++ [udemy] Extract stream URLs (#16372)
++ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
++ [cloudflarestream] Add support for cloudflarestream.com (#16375)
+* [watchbox] Fix extraction (#16356)
+* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
++ [itv:btcc] Add support for itv.com/btcc (#16139)
+* [tunein] Use live title for live streams (#16347)
+* [itv] Improve extraction (#16253)
+
+
+version 2018.05.01
+
+Core
+* [downloader/fragment] Restart download if .ytdl file is corrupt (#16312)
++ [extractor/common] Extract interaction statistic
++ [utils] Add merge_dicts
++ [extractor/common] Add _download_json_handle
+
+Extractors
+* [kaltura] Improve iframe embeds detection (#16337)
++ [udemy] Extract outputs renditions (#16289, #16291, #16320, #16321, #16334,
+ #16335)
++ [zattoo] Add support for zattoo.com and mobiltv.quickline.com (#14668, #14676)
+* [yandexmusic] Convert release_year to int
+* [udemy] Override _download_webpage_handle instead of _download_webpage
+* [xiami] Override _download_webpage_handle instead of _download_webpage
+* [yandexmusic] Override _download_webpage_handle instead of _download_webpage
+* [youtube] Correctly disable polymer on all requests (#16323, #16326)
+* [generic] Prefer enclosures over links in RSS feeds (#16189)
++ [redditr] Add support for old.reddit.com URLs (#16274)
+* [nrktv] Update API host (#16324)
++ [imdb] Extract all formats (#16249)
++ [vimeo] Extract JSON-LD (#16295)
+* [funk:channel] Improve extraction (#16285)
+
+
+version 2018.04.25
+
+Core
+* [utils] Fix match_str for boolean meta fields
++ [Makefile] Add support for pandoc 2 and disable smart extension (#16251)
+* [YoutubeDL] Fix typo in media extension compatibility checker (#16215)
+
+Extractors
++ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246,
+ #16250)
++ [twitch] Extract is_live according to status (#16259)
+* [pornflip] Relax URL regular expression (#16258)
+- [etonline] Remove extractor (#16256)
+* [breakcom] Fix extraction (#16254)
++ [youtube] Add ability to authenticate with cookies
+* [youtube:feed] Implement lazy playlist extraction (#10184)
++ [svt] Add support for TV channel live streams (#15279, #15809)
+* [ccma] Fix video extraction (#15931)
+* [rentv] Fix extraction (#15227)
++ [nick] Add support for nickjr.nl (#16230)
+* [extremetube] Fix metadata extraction
++ [keezmovies] Add support for generic embeds (#16134, #16154)
+* [nexx] Extract new azure URLs (#16223)
+* [cbssports] Fix extraction (#16217)
+* [kaltura] Improve embeds detection (#16201)
+* [instagram:user] Fix extraction (#16119)
+* [cbs] Skip DRM asset types (#16104)
+
+
+version 2018.04.16
+
+Extractors
+* [smotri:broadcast] Fix extraction (#16180)
++ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551)
+* [vine:user] Fix extraction (#15514, #16190)
+* [pornhub] Relax URL regular expression (#16165)
+* [cbc:watch] Re-acquire device token when expired (#16160)
++ [fxnetworks] Add support for https theplatform URLs (#16125, #16157)
++ [instagram:user] Add request signing (#16119)
++ [twitch] Add support for mobile URLs (#16146)
+
+
+version 2018.04.09
+
+Core
+* [YoutubeDL] Do not save/restore console title while simulate (#16103)
+* [extractor/common] Relax JSON-LD context check (#16006)
+
+Extractors
++ [generic] Add support for tube8 embeds
++ [generic] Add support for share-videos.se embeds (#16089, #16115)
+* [odnoklassniki] Extend URL regular expression (#16081)
+* [steam] Bypass mature content check (#16113)
++ [acast] Extract more metadata
+* [acast] Fix extraction (#16118)
+* [instagram:user] Fix extraction (#16119)
+* [drtuber] Fix title extraction (#16107, #16108)
+* [liveleak] Extend URL regular expression (#16117)
++ [openload] Add support for oload.xyz
+* [openload] Relax stream URL regular expression
+* [openload] Fix extraction (#16099)
++ [svtplay:series] Add support for season URLs
++ [svtplay:series] Add support for series (#11130, #16059)
+
+
+version 2018.04.03
+
+Extractors
++ [tvnow] Add support for shows (#15837)
+* [dramafever] Fix authentication (#16067)
+* [afreecatv] Use partial view only when necessary (#14450)
++ [afreecatv] Add support for authentication (#14450)
++ [nationalgeographic] Add support for new URL schema (#16001, #16054)
+* [xvideos] Fix thumbnail extraction (#15978, #15979)
+* [medialaan] Fix vod id (#16038)
++ [openload] Add support for oload.site (#16039)
+* [naver] Fix extraction (#16029)
+* [dramafever] Partially switch to API v5 (#16026)
+* [abc:iview] Unescape title and series meta fields (#15994)
+* [videa] Extend URL regular expression (#16003)
+
+
+version 2018.03.26.1
+
+Core
++ [downloader/external] Add elapsed time to progress hook (#10876)
+* [downloader/external,fragment] Fix download finalization when writing file
+ to stdout (#10809, #10876, #15799)
+
+Extractors
+* [vrv] Fix extraction on python2 (#15928)
+* [afreecatv] Update referrer (#15947)
++ [24video] Add support for 24video.sexy (#15973)
+* [crackle] Bypass geo restriction
+* [crackle] Fix extraction (#15969)
++ [lenta] Add support for lenta.ru (#15953)
++ [instagram:user] Add pagination (#15934)
+* [youku] Update ccode (#15939)
+* [libsyn] Adapt to new page structure
+
+
+version 2018.03.20
+
+Core
+* [extractor/common] Improve thumbnail extraction for HTML5 entries
+* Generalize XML manifest processing code and improve XSPF parsing
++ [extractor/common] Add _download_xml_handle
++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
+
+Extractors
++ [7plus] Extract series metadata (#15862, #15906)
+* [9now] Bypass geo restriction (#15920)
+* [cbs] Skip unavailable assets (#13490, #13506, #15776)
++ [canalc2] Add support for HTML5 videos (#15916, #15919)
++ [ceskatelevize] Add support for iframe embeds (#15918)
++ [prosiebensat1] Add support for galileo.tv (#15894)
++ [generic] Add support for xfileshare embeds (#15879)
+* [bilibili] Switch to v2 playurl API
+* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
+* [heise] Improve extraction (#15496, #15784, #15026)
+* [instagram] Fix user videos extraction (#15858)
+
+
+version 2018.03.14
+
+Extractors
+* [soundcloud] Update client id (#15866)
++ [tennistv] Add support for tennistv.com
++ [line] Add support for tv.line.me (#9427)
+* [xnxx] Fix extraction (#15817)
+* [njpwworld] Fix authentication (#15815)
+
+
+version 2018.03.10
+
+Core
+* [downloader/hls] Skip uplynk ad fragments (#15748)
+
+Extractors
+* [pornhub] Don't override session cookies (#15697)
++ [raywenderlich] Add support for videos.raywenderlich.com (#15251)
+* [funk] Fix extraction and rework extractors (#15792)
+* [nexx] Restore reverse engineered approach
++ [heise] Add support for kaltura embeds (#14961, #15728)
++ [tvnow] Extract series metadata (#15774)
+* [ruutu] Continue formats extraction on NOT-USED URLs (#15775)
+* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769)
+* [vimeo] Modernize login code and improve error messaging
+* [archiveorg] Fix extraction (#15770, #15772)
++ [hidive] Add support for hidive.com (#15494)
+* [afreecatv] Detect deleted videos
+* [afreecatv] Fix extraction (#15755)
+* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778)
++ [vidzi] Add support for vidzi.si (#15751)
+* [npo] Fix typo
+
+
+version 2018.03.03
+
+Core
++ [utils] Add parse_resolution
+Revert respect --prefer-insecure while updating
+
+Extractors
++ [yapfiles] Add support for yapfiles.ru (#15726, #11085)
+* [spankbang] Fix formats extraction (#15727)
+* [adn] Fix extraction (#15716)
++ [toggle] Extract DASH and ISM formats (#15721)
++ [nickelodeon] Add support for nickelodeon.com.tr (#15706)
+* [npo] Validate and filter format URLs (#15709)
+
+
+version 2018.02.26
+
+Extractors
+* [udemy] Use custom User-Agent (#15571)
+
+
+version 2018.02.25
+
+Core
+* [postprocessor/embedthumbnail] Skip embedding when there aren't any
+ thumbnails (#12573)
+* [extractor/common] Improve jwplayer subtitles extraction (#15695)
+
+Extractors
++ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
++ [streamango] Capture and output error messages
+* [streamango] Fix extraction (#14160, #14256)
++ [telequebec] Add support for emissions (#14649, #14655)
++ [telequebec:live] Add support for live streams (#15688)
++ [mailru:music] Add support for mail.ru/music (#15618)
+* [aenetworks] Switch to akamai HLS formats (#15612)
+* [ytsearch] Fix flat title extraction (#11260, #15681)
+
+
+version 2018.02.22
+
+Core
++ [utils] Fixup some common URL typos in sanitize_url (#15649)
+* Respect --prefer-insecure while updating (#15497)
+
+Extractors
+* [vidio] Fix HLS URL extraction (#15675)
++ [nexx] Add support for arc.nexx.cloud URLs
+* [nexx] Switch to arc API (#15652)
+* [redtube] Fix duration extraction (#15659)
++ [sonyliv] Respect referrer (#15648)
++ [brightcove:new] Use referrer for formats' HTTP headers
++ [cbc] Add support for olympics.cbc.ca (#15535)
++ [fusion] Add support for fusion.tv (#15628)
+* [npo] Improve quality metadata extraction
+* [npo] Relax URL regular expression (#14987, #14994)
++ [npo] Capture and output error message
++ [pornhub] Add support for channels (#15613)
+* [youtube] Handle shared URLs with generic extractor (#14303)
+
+
+version 2018.02.11
+
+Core
++ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
+
+Extractors
++ [francetv] Add support for live streams (#13689)
++ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
+ #15012)
+* [francetv] Separate main extractor and rework others to delegate to it
+* [francetv] Improve manifest URL signing (#15536)
++ [francetv] Sign m3u8 manifest URLs (#15565)
++ [veoh] Add support for embed URLs (#15561)
+* [afreecatv] Fix extraction (#15556)
+* [periscope] Use accessVideoPublic endpoint (#15554)
+* [discovery] Fix auth request (#15542)
++ [6play] Extract subtitles (#15541)
+* [newgrounds] Fix metadata extraction (#15531)
++ [nbc] Add support for stream.nbcolympics.com (#10295)
+* [dvtv] Fix live streams extraction (#15442)
+
+
+version 2018.02.08
+
+Extractors
++ [myvi] Extend URL regular expression
++ [myvi:embed] Add support for myvi.tv embeds (#15521)
++ [prosiebensat1] Extend URL regular expression (#15520)
+* [pokemon] Relax URL regular expression and extend title extraction (#15518)
++ [gameinformer] Use geo verification headers
+* [la7] Fix extraction (#15501, #15502)
+* [gameinformer] Fix brightcove id extraction (#15416)
++ [afreecatv] Pass referrer to video info request (#15507)
++ [telebruxelles] Add support for live streams
+* [telebruxelles] Relax URL regular expression
+* [telebruxelles] Fix extraction (#15504)
+* [extractor/common] Respect secure schemes in _extract_wowza_formats
+
+
+version 2018.02.04
+
+Core
+* [downloader/http] Randomize HTTP chunk size
++ [downloader/http] Add ability to pass downloader options via info dict
+* [downloader/http] Fix 302 infinite loops by not reusing requests
++ Document http_chunk_size
+
+Extractors
++ [brightcove] Pass embed page URL as referrer (#15486)
++ [youtube] Enforce using chunked HTTP downloading for DASH formats
+
+
+version 2018.02.03
+
+Core
++ Introduce --http-chunk-size for chunk-based HTTP downloading
++ Add support for IronPython
+* [downloader/ism] Fix Python 3.2 support
+
+Extractors
+* [redbulltv] Fix extraction (#15481)
+* [redtube] Fix metadata extraction (#15472)
+* [pladform] Respect platform id and extract HLS formats (#15468)
+- [rtlnl] Remove progressive formats (#15459)
+* [6play] Do no modify asset URLs with a token (#15248)
+* [nationalgeographic] Relax URL regular expression
+* [dplay] Relax URL regular expression (#15458)
+* [cbsinteractive] Fix data extraction (#15451)
++ [amcnetworks] Add support for sundancetv.com (#9260)
+
+
+version 2018.01.27
+
+Core
+* [extractor/common] Improve _json_ld for articles
+* Switch codebase to use compat_b64decode
++ [compat] Add compat_b64decode
+
+Extractors
++ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616)
+* [dplay] Bypass geo restriction
++ [dplay] Add support for disco-api videos (#15396)
+* [youtube] Extract precise error messages (#15284)
+* [teachertube] Capture and output error message
+* [teachertube] Fix and relax thumbnail extraction (#15403)
++ [prosiebensat1] Add another clip id regular expression (#15378)
+* [tbs] Update tokenizer url (#15395)
+* [mixcloud] Use compat_b64decode (#15394)
+- [thesixtyone] Remove extractor (#15341)
+
+
+version 2018.01.21
+
+Core
+* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187)
+* [utils] Improve scientific notation handling in js_to_json (#14789)
+
+Extractors
++ [southparkdk] Add support for southparkstudios.nu
++ [southpark] Add support for collections (#14803)
+* [franceinter] Fix upload date extraction (#14996)
++ [rtvs] Add support for rtvs.sk (#9242, #15187)
+* [restudy] Fix extraction and extend URL regular expression (#15347)
+* [youtube:live] Improve live detection (#15365)
++ [springboardplatform] Add support for springboardplatform.com
+* [prosiebensat1] Add another clip id regular expression (#15290)
+- [ringtv] Remove extractor (#15345)
+
+
+version 2018.01.18
+
+Extractors
+* [soundcloud] Update client id (#15306)
+- [kamcord] Remove extractor (#15322)
++ [spiegel] Add support for nexx videos (#15285)
+* [twitch] Fix authentication and error capture (#14090, #15264)
+* [vk] Detect more errors due to copyright complaints (#15259)
+
+
+version 2018.01.14
+
+Extractors
+* [youtube] Fix live streams extraction (#15202)
+* [wdr] Bypass geo restriction
+* [wdr] Rework extractors (#14598)
++ [wdr] Add support for wdrmaus.de/elefantenseite (#14598)
++ [gamestar] Add support for gamepro.de (#3384)
+* [viafree] Skip rtmp formats (#15232)
++ [pandoratv] Add support for mobile URLs (#12441)
++ [pandoratv] Add support for new URL format (#15131)
++ [ximalaya] Add support for ximalaya.com (#14687)
++ [digg] Add support for digg.com (#15214)
+* [limelight] Tolerate empty pc formats (#15150, #15151, #15207)
+* [ndr:embed:base] Make separate formats extraction non fatal (#15203)
++ [weibo] Add extractor (#15079)
++ [ok] Add support for live streams
+* [canalplus] Fix extraction (#15072)
+* [bilibili] Fix extraction (#15188)
+
+
+version 2018.01.07
+
+Core
+* [utils] Fix youtube-dl under PyPy3 on Windows
+* [YoutubeDL] Output python implementation in debug header
+
+Extractors
++ [jwplatform] Add support for multiple embeds (#15192)
+* [mitele] Fix extraction (#15186)
++ [motherless] Add support for groups (#15124)
+* [lynda] Relax URL regular expression (#15185)
+* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
+* [youku] Fix list extraction (#15135)
+* [openload] Fix extraction (#15166)
+* [lynda] Skip invalid subtitles (#15159)
+* [twitch] Pass video id to url_result when extracting playlist (#15139)
+* [rtve.es:alacarta] Fix extraction of some new URLs
+* [acast] Fix extraction (#15147)
+
+
+version 2017.12.31
+
+Core
++ [extractor/common] Add container meta field for formats extracted
+ in _parse_mpd_formats (#13616)
++ [downloader/hls] Use HTTP headers for key request
+* [common] Use AACL as the default fourcc when AudioTag is 255
+* [extractor/common] Fix extraction of DASH formats with the same
+ representation id (#15111)
+
+Extractors
++ [slutload] Add support for mobile URLs (#14806)
+* [abc:iview] Bypass geo restriction
+* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
+ #15035, #15057, #15061, #15071, #15095, #15106)
+* [openload] Fix extraction (#15118)
+- [sandia] Remove extractor
+- [collegerama] Remove extractor
++ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
+ #11185, #14343)
++ [ufctv] Add support for ufc.tv (#14520)
+* [pluralsight] Fix missing first line of subtitles (#11118)
+* [openload] Fallback on f-page extraction (#14665, #14879)
+* [vimeo] Improve password protected videos extraction (#15114)
+* [aws] Fix canonical/signed headers generation on python 2 (#15102)
+
+
+version 2017.12.28
+
+Extractors
++ [internazionale] Add support for internazionale.it (#14973)
+* [playtvak] Relax video regular expression and make description optional
+ (#15037)
++ [filmweb] Add support for filmweb.no (#8773, #10368)
++ [23video] Add support for 23video.com
++ [espn] Add support for fivethirtyeight.com (#6864)
++ [umg:de] Add support for universal-music.de (#11582, #11584)
++ [espn] Add support for espnfc and extract more formats (#8053)
+* [youku] Update ccode (#14880)
++ [openload] Add support for oload.stream (#15070)
+* [youku] Fix list extraction (#15065)
+
+
+version 2017.12.23
+
+Core
+* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
++ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
+ output template (#11427, #15018)
++ [extractor/common] Introduce uploader, uploader_id and uploader_url
+ meta fields for playlists (#11427, #15018)
+* [downloader/fragment] Encode filename of fragment being removed (#15020)
++ [utils] Add another date format pattern (#14999)
+
+Extractors
++ [kaltura] Add another embed pattern for entry_id
++ [7plus] Add support for 7plus.com.au (#15043)
+* [animeondemand] Relax login error regular expression
++ [shahid] Add support for show pages (#7401)
++ [youtube] Extract uploader, uploader_id and uploader_url for playlists
+ (#11427, #15018)
+* [afreecatv] Improve format extraction (#15019)
++ [cspan] Add support for audio only pages and catch page errors (#14995)
++ [mailru] Add support for embed URLs (#14904)
+* [crunchyroll] Future-proof XML element checks (#15013)
+* [cbslocal] Fix timestamp extraction (#14999, #15000)
+* [discoverygo] Correct TTML subtitle extension
+* [vk] Make view count optional (#14979)
+* [disney] Skip Apple FairPlay formats (#14982)
+* [voot] Fix format extraction (#14758)
+
+
+version 2017.12.14
+
+Core
+* [postprocessor/xattr] Clarify NO_SPACE message (#14970)
+* [downloader/http] Return actual download result from real_download (#14971)
+
+Extractors
++ [itv] Extract more subtitles and duration
+* [itv] Improve extraction (#14944)
++ [byutv] Add support for geo restricted videos
+* [byutv] Fix extraction (#14966, #14967)
++ [bbccouk] Fix extraction for 320k HLS streams
++ [toutv] Add support for special video URLs (#14179)
+* [discovery] Fix free videos extraction (#14157, #14954)
+* [tvnow] Fix extraction (#7831)
++ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893)
+* [nick] Improve extraction (#14876)
+* [tbs] Fix extraction (#13658)
+
+
+version 2017.12.10
+
+Core
++ [utils] Add sami mimetype to mimetype2ext
+
+Extractors
+* [culturebox] Improve video id extraction (#14947)
+* [twitter] Improve extraction (#14197)
++ [udemy] Extract more HLS formats
+* [udemy] Improve course id extraction (#14938)
++ [stretchinternet] Add support for portal.stretchinternet.com (#14576)
+* [ellentube] Fix extraction (#14407, #14570)
++ [raiplay:playlist] Add support for playlists (#14563)
+* [sonyliv] Bypass geo restriction
+* [sonyliv] Extract higher quality formats (#14922)
+* [fox] Extract subtitles
++ [fox] Add support for Adobe Pass authentication (#14205, #14489)
+- [dailymotion:cloud] Remove extractor (#6794)
+* [xhamster] Fix thumbnail extraction (#14780)
++ [xhamster] Add support for mobile URLs (#14780)
+* [generic] Don't pass video id as mpd id while extracting DASH (#14902)
+* [ard] Skip invalid stream URLs (#14906)
+* [porncom] Fix metadata extraction (#14911)
+* [pluralsight] Detect agreement request (#14913)
+* [toutv] Fix login (#14614)
+
+
+version 2017.12.02
+
+Core
++ [downloader/fragment] Commit part file after each fragment
++ [extractor/common] Add durations for DASH fragments with bare SegmentURLs
++ [extractor/common] Add support for DASH manifests with SegmentLists with
+ bare SegmentURLs (#14844)
++ [utils] Add hvc1 codec code to parse_codecs
+
+Extractors
+* [xhamster] Fix extraction (#14884)
+* [youku] Update ccode (#14872)
+* [mnet] Fix format extraction (#14883)
++ [xiami] Add Referer header to API request
+* [mtv] Correct scc extension in extracted subtitles (#13730)
+* [vvvvid] Fix extraction for kenc videos (#13406)
++ [br] Add support for BR Mediathek videos (#14560, #14788)
++ [daisuki] Add support for motto.daisuki.com (#14681)
+* [odnoklassniki] Fix API metadata request (#14862)
+* [itv] Fix HLS formats extraction
++ [pbs] Add another media id regular expression
+
+
+version 2017.11.26
+
+Core
+* [extractor/common] Use final URL when dumping request (#14769)
+
+Extractors
+* [fczenit] Fix extraction
+- [firstpost] Remove extractor
+* [freespeech] Fix extraction
+* [nexx] Extract more formats
++ [openload] Add support for openload.link (#14763)
+* [empflix] Relax URL regular expression
+* [empflix] Fix extraction
+* [tnaflix] Don't modify download URLs (#14811)
+- [gamersyde] Remove extractor
+* [francetv:generationwhat] Fix extraction
++ [massengeschmacktv] Add support for Massengeschmack TV
+* [fox9] Fix extraction
+* [faz] Fix extraction and add support for Perform Group embeds (#14714)
++ [performgroup] Add support for performgroup.com
++ [jwplatform] Add support for iframes (#14828)
+* [culturebox] Fix extraction (#14827)
+* [youku] Fix extraction; update ccode (#14815)
+* [livestream] Make SMIL extraction non fatal (#14792)
++ [drtuber] Add support for mobile URLs (#14772)
++ [spankbang] Add support for mobile URLs (#14771)
+* [instagram] Fix description, timestamp and counters extraction (#14755)
+
+
+version 2017.11.15
+
+Core
+* [common] Skip Apple FairPlay m3u8 manifests (#14741)
+* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
+
+Extractors
+* [vshare] Capture and output error message
+* [vshare] Fix extraction (#14473)
+* [crunchyroll] Extract old RTMP formats
+* [tva] Fix extraction (#14736)
+* [gamespot] Lower preference of HTTP formats (#14652)
+* [instagram:user] Fix extraction (#14699)
+* [ccma] Fix typo (#14730)
+- Remove sensitive data from logging in messages
+* [instagram:user] Fix extraction (#14699)
++ [gamespot] Add support for article URLs (#14652)
+* [gamespot] Skip Brightcove Once HTTP formats (#14652)
+* [cartoonnetwork] Update tokenizer_src (#14666)
++ [wsj] Recognize another URL pattern (#14704)
+* [pandatv] Update API URL and sign format URLs (#14693)
+* [crunchyroll] Use old login method (#11572)
+
+
+version 2017.11.06
+
+Core
++ [extractor/common] Add protocol for f4m formats
+* [f4m] Prefer baseURL for relative URLs (#14660)
+* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
+
+Extractors
++ [hotstar:playlist] Add support for playlists (#12465)
+* [hotstar] Bypass geo restriction (#14672)
+- [22tracks] Remove extractor (#11024, #14628)
++ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
+* [gamespot] Extract formats referenced with new data fields (#14652)
+* [spankbang] Detect unavailable videos (#14644)
+
+
+version 2017.10.29
+
+Core
+* [extractor/common] Prefix format id for audio only HLS formats
++ [utils] Add support for zero years and months in parse_duration
+
+Extractors
+* [egghead] Fix extraction (#14388)
++ [fxnetworks] Extract series metadata (#14603)
++ [younow] Add support for younow.com (#9255, #9432, #12436)
+* [dctptv] Fix extraction (#14599)
+* [youtube] Restrict embed regular expression (#14600)
+* [vimeo] Restrict iframe embed regular expression (#14600)
+* [soundgasm] Improve extraction (#14588)
+- [myvideo] Remove extractor (#8557)
++ [nbc] Add support for classic-tv videos (#14575)
++ [vrtnu] Add support for cookies authentication and simplify (#11873)
++ [canvas] Add support for vrt.be/vrtnu (#11873)
+* [twitch:clips] Fix title extraction (#14566)
++ [ndtv] Add support for sub-sites (#14534)
+* [dramafever] Fix login error message extraction
++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
+ ro, hu) (#14553)
+
+
+version 2017.10.20
+
+Core
+* [downloader/fragment] Report warning instead of error on inconsistent
+ download state
+* [downloader/hls] Fix total fragments count when ad fragments exist
+
+Extractors
+* [parliamentliveuk] Fix extraction (#14524)
+* [soundcloud] Update client id (#14546)
++ [servus] Add support for servus.com (#14362)
++ [unity] Add support for unity3d.com (#14528)
+* [youtube] Replace youtube redirect URLs in description (#14517)
+* [pbs] Restrict direct video URL regular expression (#14519)
+* [drtv] Respect preference for direct HTTP formats (#14509)
++ [eporner] Add support for embed URLs (#14507)
+* [arte] Capture and output error message
+* [niconico] Improve uploader metadata extraction robustness (#14135)
+
+
+version 2017.10.15.1
+
+Core
+* [downloader/hls] Ignore anvato ad fragments (#14496)
+* [downloader/fragment] Output ad fragment count
+
+Extractors
+* [scrippsnetworks:watch] Bypass geo restriction
++ [anvato] Add ability to bypass geo restriction
+* [redditr] Fix extraction for URLs with query (#14495)
+
+
+version 2017.10.15
+
+Core
++ [common] Add support for jwplayer youtube embeds
+
+Extractors
+* [scrippsnetworks:watch] Fix extraction (#14389)
+* [anvato] Process master m3u8 manifests
+* [youtube] Fix relative URLs in description
+* [spike] Bypass geo restriction
++ [howstuffworks] Add support for more domains
+* [infoq] Fix http format downloading
++ [rtlnl] Add support for another type of embeds
++ [onionstudios] Add support for bulbs-video embeds
+* [udn] Fix extraction
+* [shahid] Fix extraction (#14448)
+* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
+* [vh1] Fix extraction (#9613)
+
+
+version 2017.10.12
+
+Core
+* [YoutubeDL] Improve _default_format_spec (#14461)
+
+Extractors
+* [steam] Fix extraction (#14067)
++ [funk] Add support for funk.net (#14464)
++ [nexx] Add support for shortcuts and relax domain id extraction
++ [voxmedia] Add support for recode.net (#14173)
++ [once] Add support for vmap URLs
++ [generic] Add support for channel9 embeds (#14469)
+* [tva] Fix extraction (#14328)
++ [tubitv] Add support for new URL format (#14460)
+- [afreecatv:global] Remove extractor
+- [youtube:shared] Removed extractor (#14420)
++ [slideslive] Add support for slideslive.com (#2680)
++ [facebook] Support thumbnails (#14416)
+* [vvvvid] Fix episode number extraction (#14456)
+* [hrti:playlist] Relax URL regular expression
+* [wdr] Relax media link regular expression (#14447)
+* [hrti] Relax URL regular expression (#14443)
+* [fox] Delegate extraction to uplynk:preplay (#14147)
++ [youtube] Add support for hooktube.com (#14437)
+
+
+version 2017.10.07
+
+Core
+* [YoutubeDL] Ignore duplicates in --playlist-items
+* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
+ reduce code duplication (#14425)
++ [utils] Use cache in OnDemandPagedList by default
+* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
+
+Extractors
+* [reddit] Sort formats (#14430)
+* [lnkgo] Relax URL regular expression (#14423)
+* [pornflip] Extend URL regular expression (#14405, #14406)
++ [xtube] Add support for embed URLs (#14417)
++ [xvideos] Add support for embed URLs and improve extraction (#14409)
+* [beeg] Fix extraction (#14403)
+* [tvn24] Relax URL regular expression (#14395)
+* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
+ #14392, #14414, #14419, #14431)
++ [ketnet] Add support for videos without direct sources (#14377)
+* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
++ [afreecatv] Add support for adult videos (#14376)
+
+
+version 2017.10.01
+
+Core
+* [YoutubeDL] Document youtube_include_dash_manifest
+
+Extractors
++ [tvp] Add support for new URL schema (#14368)
++ [generic] Add support for single format Video.js embeds (#14371)
+* [yahoo] Bypass geo restriction for brightcove (#14210)
+* [yahoo] Use extracted brightcove account id (#14210)
+* [rtve:alacarta] Fix extraction (#14290)
++ [yahoo] Add support for custom brightcove embeds (#14210)
++ [generic] Add support for Video.js embeds
++ [gfycat] Add support for /gifs/detail URLs (#14322)
+* [generic] Fix infinite recursion for twitter:player URLs (#14339)
+* [xhamsterembed] Fix extraction (#14308)
+
+
+version 2017.09.24
+
+Core
++ [options] Accept lrc as a subtitle conversion target format (#14292)
+* [utils] Fix handling raw TTML subtitles (#14191)
+
+Extractors
+* [24video] Fix timestamp extraction and make non fatal (#14295)
++ [24video] Add support for 24video.adult (#14295)
++ [kakao] Add support for tv.kakao.com (#12298, #14007)
++ [twitter] Add support for URLs without user id (#14270)
++ [americastestkitchen] Add support for americastestkitchen.com (#10764,
+ #13996)
+* [generic] Fix support for multiple HTML5 videos on one page (#14080)
+* [mixcloud] Fix extraction (#14088, #14132)
++ [lynda] Add support for educourse.ga (#14286)
+* [beeg] Fix extraction (#14275)
+* [nbcsports:vplayer] Correct theplatform URL (#13873)
+* [twitter] Fix duration extraction (#14141)
+* [tvplay] Bypass geo restriction
++ [heise] Add support for YouTube embeds (#14109)
++ [popcorntv] Add support for popcorntv.it (#5914, #14211)
+* [viki] Update app data (#14181)
+* [morningstar] Relax URL regular expression (#14222)
+* [openload] Fix extraction (#14225, #14257)
+* [noovo] Fix extraction (#14214)
+* [dailymotion:playlist] Relax URL regular expression (#14219)
++ [twitch] Add support for go.twitch.tv URLs (#14215)
+* [vgtv] Relax URL regular expression (#14223)
+
+
+version 2017.09.15
+
+Core
+* [downloader/fragment] Restart inconsistent incomplete fragment downloads
+ (#13731)
+* [YoutubeDL] Download raw subtitles files (#12909, #14191)
+
+Extractors
+* [condenast] Fix extraction (#14196, #14207)
++ [orf] Add support for f4m stories
+* [tv4] Relax URL regular expression (#14206)
+* [animeondemand] Bypass geo restriction
++ [animeondemand] Add support for flash videos (#9944)
+
+
+version 2017.09.11
+
+Extractors
+* [rutube:playlist] Fix suitable (#14166)
+
+
+version 2017.09.10
+
+Core
++ [utils] Introduce bool_or_none
+* [YoutubeDL] Ensure dir existence for each requested format (#14116)
+
+Extractors
+* [fox] Fix extraction (#14147)
+* [rutube] Use bool_or_none
+* [rutube] Rework and generalize playlist extractors (#13565)
++ [rutube:playlist] Add support for playlists (#13534, #13565)
++ [radiocanada] Add fallback for title extraction (#14145)
+* [vk] Use dedicated YouTube embeds extraction routine
+* [vice] Use dedicated YouTube embeds extraction routine
+* [cracked] Use dedicated YouTube embeds extraction routine
+* [chilloutzone] Use dedicated YouTube embeds extraction routine
+* [abcnews] Use dedicated YouTube embeds extraction routine
+* [youtube] Separate methods for embeds extraction
+* [redtube] Fix formats extraction (#14122)
+* [arte] Relax unavailability check (#14112)
++ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
+* [vidme:user] Relax URL regular expression (#14054)
+* [bpb] Fix extraction (#14043, #14086)
+* [soundcloud] Fix download URL with private tracks (#14093)
+* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
+* [viidea] Capture and output lecture error message (#14099)
+* [radiocanada] Skip unsupported platforms (#14100)
+
+
+version 2017.09.02
+
+Extractors
+* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
+ #14077, #14079, #14082, #14083, #14094, #14095, #14096)
+* [youtube] Fix upload date extraction (#14065)
++ [charlierose] Add support for episodes (#14062)
++ [bbccouk] Add support for w-prefixed ids (#14056)
+* [googledrive] Extend URL regular expression (#9785)
++ [googledrive] Add support for source format (#14046)
+* [pornhd] Fix extraction (#14005)
+
+
+version 2017.08.27.1
+
+Extractors
+
+* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
+
+
+version 2017.08.27
+
+Core
++ [extractor/common] Extract height and format id for HTML5 videos (#14034)
+* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
+ #8625, #9483)
+ * Simplify code and split into separate routines to facilitate maintaining
+ * Make retry mechanism work on errors during actual download not only
+ during connection establishment phase
+ * Retry on ECONNRESET and ETIMEDOUT during reading data from network
+ * Retry on content too short
+ * Show error description on retry
+
+Extractors
+* [generic] Lower preference for extraction from LD-JSON
+* [rai] Fix audio formats extraction (#14024)
+* [youtube] Fix controversy videos extraction (#14027, #14029)
+* [mixcloud] Fix extraction (#14015, #14020)
+
+
+version 2017.08.23
+
+Core
++ [extractor/common] Introduce _parse_xml
+* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
+ non fatal (#13970)
+* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
+
+Extractors
+* [cbc:watch] Bypass geo restriction (#13993)
+* [toutv] Relax DRM check (#13994)
++ [googledrive] Add support for subtitles (#13619, #13638)
+* [pornhub] Relax uploader regular expression (#13906, #13975)
+* [bandcamp:album] Extract track titles (#13962)
++ [bbccouk] Add support for events URLs (#13893)
++ [liveleak] Support multi-video pages (#6542)
++ [liveleak] Support another liveleak embedding pattern (#13336)
+* [cda] Fix extraction (#13935)
++ [laola1tv] Add support for tv.ittf.com (#13965)
+* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
+
+
+version 2017.08.18
+
+Core
+* [YoutubeDL] Sanitize byte string format URLs (#13951)
++ [extractor/common] Add support for float durations in _parse_mpd_formats
+ (#13919)
+
+Extractors
+* [arte] Detect unavailable videos (#13945)
+* [generic] Convert redirect URLs to unicode strings (#13951)
+* [udemy] Fix paid course detection (#13943)
+* [pluralsight] Use RPC API for course extraction (#13937)
++ [clippit] Add support for clippituser.tv
++ [qqmusic] Support new URL schemes (#13805)
+* [periscope] Renew HLS extraction (#13917)
+* [mixcloud] Extract decrypt key
+
+
+version 2017.08.13
+
+Core
+* [YoutubeDL] Make sure format id is not empty
+* [extractor/common] Make _family_friendly_search optional
+* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
+
+Extractors
+* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
++ [fourtube] Add support pornerbros.com (#6022)
++ [fourtube] Add support porntube.com (#7859, #13901)
++ [fourtube] Add support fux.com
+* [limelight] Improve embeds detection (#13895)
++ [reddit] Add support for v.redd.it and reddit.com (#13847)
+* [aparat] Extract all formats (#13887)
+* [mixcloud] Fix play info decryption (#13885)
++ [generic] Add support for vzaar embeds (#13876)
+
+
+version 2017.08.09
+
+Core
+* [utils] Skip missing params in cli_bool_option (#13865)
+
+Extractors
+* [xxxymovies] Fix title extraction (#13868)
++ [nick] Add support for nick.com.pl (#13860)
+* [mixcloud] Fix play info decryption (#13867)
+* [20min] Fix embeds extraction (#13852)
+* [dplayit] Fix extraction (#13851)
++ [niconico] Support videos with multiple formats (#13522)
++ [niconico] Support HTML5-only videos (#13806)
+
+
+version 2017.08.06
+
+Core
+* Use relative paths for DASH fragments (#12990)
+
+Extractors
+* [pluralsight] Fix format selection
+- [mpora] Remove extractor (#13826)
++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218)
+* [vlive:channel] Limit number of videos per page to 100 (#13830)
+* [podomatic] Extend URL regular expression (#13827)
+* [cinchcast] Extend URL regular expression
+* [yandexdisk] Relax URL regular expression (#13824)
+* [vidme] Extract DASH and HLS formats
+- [teamfour] Remove extractor (#13782)
+* [pornhd] Fix extraction (#13783)
+* [udemy] Fix subtitles extraction (#13812)
+* [mlb] Extend URL regular expression (#13740, #13773)
++ [pbs] Add support for new URL schema (#13801)
+* [nrktv] Update API host (#13796)
+
+
+version 2017.07.30.1
+
+Core
+* [downloader/hls] Use redirect URL as manifest base (#13755)
+* [options] Correctly hide login info from debug outputs (#13696)
+
+Extractors
++ [watchbox] Add support for watchbox.de (#13739)
+- [clipfish] Remove extractor
++ [youjizz] Fix extraction (#13744)
++ [generic] Add support for another ooyala embed pattern (#13727)
++ [ard] Add support for lives (#13771)
+* [soundcloud] Update client id
++ [soundcloud:trackstation] Add support for track stations (#13733)
+* [svtplay] Use geo verification proxy for API request
+* [svtplay] Update API URL (#13767)
++ [yandexdisk] Add support for yadi.sk (#13755)
++ [megaphone] Add support for megaphone.fm
+* [amcnetworks] Make rating optional (#12453)
+* [cloudy] Fix extraction (#13737)
++ [nickru] Add support for nickelodeon.ru
+* [mtv] Improve thumbnail extraction
+* [nick] Automate geo-restriction bypass (#13711)
+* [niconico] Improve error reporting (#13696)
+
+
+version 2017.07.23
+
+Core
+* [YoutubeDL] Improve default format specification (#13704)
+* [YoutubeDL] Do not override id, extractor and extractor_key for
+ url_transparent entities
+* [extractor/common] Fix playlist_from_matches
+
+Extractors
+* [itv] Fix production id extraction (#13671, #13703)
+* [vidio] Make duration non fatal and fix typo
+* [mtv] Skip missing video parts (#13690)
+* [sportbox:embed] Fix extraction
++ [npo] Add support for npo3.nl URLs (#13695)
+* [dramafever] Remove video id from title (#13699)
++ [egghead:lesson] Add support for lessons (#6635)
+* [funnyordie] Extract more metadata (#13677)
+* [youku:show] Fix playlist extraction (#13248)
++ [dispeak] Recognize sevt subdomain (#13276)
+* [adn] Improve error reporting (#13663)
+* [crunchyroll] Relax series and season regular expression (#13659)
++ [spiegel:article] Add support for nexx iframe embeds (#13029)
++ [nexx:embed] Add support for iframe embeds
+* [nexx] Improve JS embed extraction
++ [pearvideo] Add support for pearvideo.com (#13031)
+
+
+version 2017.07.15
+
+Core
+* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
+
+Extractors
+* [spiegeltv] Delegate extraction to nexx extractor (#13159)
++ [nexx] Add support for nexx.cloud (#10807, #13465)
+* [generic] Fix rutube embeds extraction (#13641)
+* [karrierevideos] Fix title extraction (#13641)
+* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
+* [slideshare] Fix extraction (#13617)
++ [5tv] Add another video URL pattern (#13354, #13606)
+* [drtv] Make HLS and HDS extraction non fatal
+* [ted] Fix subtitles extraction (#13628, #13629)
+* [vine] Make sure the title won't be empty
++ [twitter] Support HLS streams in vmap URLs
++ [periscope] Support pscp.tv URLs in embedded frames
+* [twitter] Extract mp4 urls via mobile API (#12726)
+* [niconico] Fix authentication error handling (#12486)
+* [giantbomb] Extract m3u8 formats (#13626)
++ [vlive:playlist] Add support for playlists (#13613)
+
+
+version 2017.07.09
+
+Core
++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
++ [utils] Support attributes with no values in get_elements_by_attribute
+
+Extractors
++ [dailymail] Add support for embeds
++ [joj] Add support for joj.sk (#13268)
+* [abc.net.au:iview] Extract more formats (#13492, #13489)
+* [egghead:course] Fix extraction (#6635, #13370)
++ [cjsw] Add support for cjsw.com (#13525)
++ [eagleplatform] Add support for referrer protected videos (#13557)
++ [eagleplatform] Add support for another embed pattern (#13557)
+* [veoh] Extend URL regular expression (#13601)
+* [npo:live] Fix live stream id extraction (#13568, #13605)
+* [googledrive] Fix height extraction (#13603)
++ [dailymotion] Add support for new layout (#13580)
+- [yam] Remove extractor
+* [xhamster] Extract all formats and fix duration extraction (#13593)
++ [xhamster] Add support for new URL schema (#13593)
+* [espn] Extend URL regular expression (#13244, #13549)
+* [kaltura] Fix typo in subtitles extraction (#13569)
+* [vier] Adapt extraction to redesign (#13575)
+
+
+version 2017.07.02
+
+Core
+* [extractor/common] Improve _json_ld
+
+Extractors
++ [thisoldhouse] Add more fallbacks for video id
+* [thisoldhouse] Fix video id extraction (#13540, #13541)
+* [xfileshare] Extend format regular expression (#13536)
+* [ted] Fix extraction (#13535)
++ [tastytrade] Add support for tastytrade.com (#13521)
+* [dplayit] Relax video id regular expression (#13524)
++ [generic] Extract more generic metadata (#13527)
++ [bbccouk] Capture and output error message (#13501, #13518)
+* [cbsnews] Relax video info regular expression (#13284, #13503)
++ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
+* [soundcloud] Switch to https for API requests (#13502)
+* [pandatv] Switch to https for API and download URLs
++ [pandatv] Add support for https URLs (#13491)
++ [niconico] Support sp subdomain (#13494)
+
+
+version 2017.06.25
+
+Core
++ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
+* [YoutubeDL] Skip malformed formats for better extraction robustness
+
+Extractors
++ [wsj] Add support for barrons.com (#13470)
++ [ign] Add another video id pattern (#13328)
++ [raiplay:live] Add support for live streams (#13414)
++ [redbulltv] Add support for live videos and segments (#13486)
++ [onetpl] Add support for videos embedded via pulsembed (#13482)
+* [ooyala] Make more robust
+* [ooyala] Skip empty format URLs (#13471, #13476)
+* [hgtv.com:show] Fix typo
+
+
+version 2017.06.23
+
+Core
+* [adobepass] Fix extraction on older python 2.6
+
+Extractors
+* [youtube] Adapt to new automatic captions rendition (#13467)
+* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
+* [drtuber] Fix formats extraction (#12058)
+* [youporn] Fix upload date extraction
+* [youporn] Improve formats extraction
+* [youporn] Fix title extraction (#13456)
+* [googledrive] Fix formats sorting (#13443)
+* [watchindianporn] Fix extraction (#13411, #13415)
++ [vimeo] Add fallback mp4 extension for original format
++ [ruv] Add support for ruv.is (#13396)
+* [viu] Fix extraction on older python 2.6
+* [pandora.tv] Fix upload_date extraction (#12846)
++ [asiancrush] Add support for asiancrush.com (#13420)
+
+
+version 2017.06.18
+
+Core
+* [downloader/common] Use utils.shell_quote for debug command line
+* [utils] Use compat_shlex_quote in shell_quote
+* [postprocessor/execafterdownload] Encode command line (#13407)
+* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
+* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
+ in --metadata-from-title (#13408)
+* [extractor/common] Fix json dumping with --geo-bypass
++ [extractor/common] Improve jwplayer subtitles extraction
++ [extractor/common] Improve jwplayer formats extraction (#13379)
+
+Extractors
+* [polskieradio] Fix extraction (#13392)
++ [xfileshare] Add support for fastvideo.me (#13385)
+* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
+* [4tube] Fix extraction (#13381, #13382)
++ [disney] Add support for disneychannel.de (#13383)
+* [npo] Improve URL regular expression (#13376)
++ [corus] Add support for showcase.ca
++ [corus] Add support for history.ca (#13359)
+
+
+version 2017.06.12
+
+Core
+* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
++ [compat] Introduce compat_HTMLParseError
+* [utils] Improve unified_timestamp
+* [extractor/generic] Ensure format id is unicode string
+* [extractor/common] Return unicode string from _match_id
++ [YoutubeDL] Sanitize more fields (#13313)
+
+Extractors
++ [xfileshare] Add support for rapidvideo.tv (#13348)
+* [xfileshare] Modernize and pass Referer
++ [rutv] Add support for testplayer.vgtrk.com (#13347)
++ [newgrounds] Extract more metadata (#13232)
++ [newgrounds:playlist] Add support for playlists (#10611)
+* [newgrounds] Improve formats and uploader extraction (#13346)
+* [msn] Fix formats extraction
+* [turbo] Ensure format id is string
+* [sexu] Ensure height is int
+* [jove] Ensure comment count is int
+* [golem] Ensure format id is string
+* [gfycat] Ensure filesize is int
+* [foxgay] Ensure height is int
+* [flickr] Ensure format id is string
+* [sohu] Fix numeric fields
+* [safari] Improve authentication detection (#13319)
+* [liveleak] Ensure height is int (#13313)
+* [streamango] Make title optional (#13292)
+* [rtlnl] Improve URL regular expression (#13295)
+* [tvplayer] Fix extraction (#13291)
+
+
+version 2017.06.05
+
+Core
+* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
+
+Extractors
++ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
+* [pornhub:playlist] Fix extraction (#13281)
+- [godtv] Remove extractor (#13175)
+* [safari] Fix typo (#13252)
+* [youtube] Improve chapters extraction (#13247)
+* [1tv] Lower preference for HTTP formats (#13246)
+* [francetv] Relax URL regular expression
+* [drbonanza] Fix extraction (#13231)
+* [packtpub] Fix authentication (#13240)
+
+
+version 2017.05.29
+
+Extractors
+* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
+ (#13211)
+* [xhamster] Fix uploader and like/dislike count extraction (#13216))
++ [xhamster] Extract categories (#11728)
++ [abcnews] Add support for embed URLs (#12851)
+* [gaskrank] Fix extraction (#12493)
+* [medialaan] Fix videos with missing videoUrl (#12774)
+* [dvtv] Fix playlist support
++ [dvtv] Add support for DASH and HLS formats (#3063)
++ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
+* [cbsinteractive] Relax URL regular expression (#13213)
+* [adn] Fix formats extraction
++ [youku] Extract more metadata (#10433)
+* [cbsnews] Fix extraction (#13205)
+
+
+version 2017.05.26
+
+Core
++ [utils] strip_jsonp() can recognize more patterns
+* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
+
+Extractors
++ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
++ [bbc] Add support for authentication
+* [tudou] Merge into youku extractor (#12214)
+* [youku:show] Fix extraction
+* [youku] Fix extraction (#13191)
+* [udemy] Fix extraction for outputs' format entries without URL (#13192)
+* [vimeo] Fix formats' sorting (#13189)
+* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
+
+
+version 2017.05.23
+
+Core
++ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
++ [adobepass] Add support for Bright House Networks (#13149)
+
+Extractors
++ [streamcz] Add support for subtitles (#13174)
+* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
+* [toggle] Relax URL regular expression (#13172)
+* [toypics] Fix extraction (#13077)
+* [njpwworld] Fix extraction (#13162, #13169)
++ [hitbox] Add support for smashcast.tv (#13154)
+* [mitele] Update app key regular expression (#13158)
+
+
+version 2017.05.18.1
+
+Core
+* [jsinterp] Fix typo and cleanup regular expressions (#13134)
+
+
+version 2017.05.18
+
+Core
++ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125,
+ #13126, #13128, #13129, #13130, #13131, #13132)
++ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats
+ (#13088, #13092)
++ [utils] Recognize more audio codecs (#13081)
+
+Extractors
++ [vier] Extract more metadata (#12539)
+* [vier] Improve extraction (#12801)
+ + Add support for authentication
+ * Bypass authentication when no credentials provided
+ * Improve extraction robustness
+* [dailymail] Fix sources extraction (#13057)
+* [dailymotion] Extend URL regular expression (#13079)
+
+
+version 2017.05.14
+
+Core
++ [extractor/common] Respect Width and Height attributes in ISM manifests
++ [postprocessor/metadatafromtitle] Add support regular expression syntax for
+ --metadata-from-title (#13065)
+
+Extractors
++ [mediaset] Add support for video.mediaset.it (#12708, #12964)
+* [orf:radio] Fix extraction (#11643, #12926)
+* [aljazeera] Extend URL regular expression (#13053)
+* [imdb] Relax URL regular expression (#13056)
++ [francetv] Add support for mobile.france.tv (#13068)
++ [upskill] Add support for upskillcourses.com (#13043)
+* [thescene] Fix extraction (#13061)
+* [condenast] Improve embed support
+* [liveleak] Fix extraction (#12053)
++ [douyu] Support Douyu shows (#12228)
+* [myspace] Improve URL regular expression (#13040)
+* [adultswim] Use desktop platform in assets URL (#13041)
+
+
+version 2017.05.09
+
+Core
+* [YoutubeDL] Force --restrict-filenames when no locale is set on all python
+ versions (#13027)
+
+Extractors
+* [francetv] Adapt to site redesign (#13034)
++ [packtpub] Add support for authentication (#12622)
+* [drtv] Lower preference for SignLanguage formats (#13013, #13016)
++ [cspan] Add support for brightcove live embeds (#13028)
+* [vrv] Extract DASH formats and subtitles
+* [funimation] Fix authentication (#13021)
+* [adultswim] Fix extraction (#8640, #10950, #11042, #12121)
+ + Add support for Adobe Pass authentication
+ + Add support for live streams
+ + Add support for show pages
+* [turner] Extract thumbnail, is_live and strip description
++ [nonktube] Add support for nonktube.com (#8647, #13024)
++ [nuevo] Pass headers to _extract_nuevo
+* [nbc] Improve extraction (#12364)
+
+
+version 2017.05.07
+
+Common
+* [extractor/common] Fix typo in _extract_akamai_formats
++ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata
++ [extractor/common] Introduce chapters meta field
+
+Extractors
+* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995,
+ #13003)
+* [bilibili] Fix video downloading (#13001)
+* [rmcdecouverte] Fix extraction (#12937)
+* [theplatform] Extract chapters
+* [bandcamp] Fix thumbnail extraction (#12980)
+* [pornhub] Extend URL regular expression (#12996)
++ [youtube] Extract chapters
++ [nrk] Extract chapters
++ [vice] Add support for ooyala embeds in article pages
++ [vice] Support vice articles (#12968)
+* [vice] Fix extraction for non en_us videos (#12967)
+* [gdcvault] Fix extraction for some videos (#12733)
+* [pbs] Improve multipart video support (#12981)
+* [laola1tv] Fix extraction (#12880)
++ [cda] Support birthday verification (#12789)
+* [leeco] Fix extraction (#12974)
++ [pbs] Extract chapters
+* [amp] Improve thumbnail and subtitles extraction
+* [foxsports] Fix extraction (#12945)
+- [coub] Remove comment count extraction (#12941)
+
+
+version 2017.05.01
+
+Core
++ [extractor/common] Extract view count from JSON-LD
+* [utils] Improve unified_timestamp
++ [utils] Add video/mp2t to mimetype2ext
+* [downloader/external] Properly handle live stream downloading cancellation
+ (#8932)
++ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906)
+
+Extractors
+* [infoq] Make audio format extraction non fatal (#12938)
+* [brightcove] Allow whitespace around attribute names in embedded code
++ [zaq1] Add support for zaq1.pl (#12693)
++ [xvideos] Extract duration (#12828)
+* [vevo] Fix extraction (#12879)
++ [noovo] Add support for noovo.ca (#12792)
++ [washingtonpost] Add support for embeds (#12699)
+* [yandexmusic:playlist] Fix extraction for python 3 (#12888)
+* [anvato] Improve extraction (#12913)
+ * Promote to regular shortcut based extractor
+ * Add mcp to access key mapping table
+ * Add support for embeds extraction
+ * Add support for anvato embeds in generic extractor
+* [xtube] Fix extraction for older FLV videos (#12734)
+* [tvplayer] Fix extraction (#12908)
+
+
+version 2017.04.28
+
+Core
++ [adobepass] Use geo verification headers for all requests
+- [downloader/fragment] Remove assert for resume_len when no fragments
+ downloaded
++ [extractor/common] Add manifest_url for explicit group rendition formats
+* [extractor/common] Fix manifest_url for m3u8 formats
+- [extractor/common] Don't list master m3u8 playlists in format list (#12832)
+
+Extractor
+* [aenetworks] Fix extraction for shows with single season
++ [go] Add support for Disney, DisneyJunior and DisneyXD show pages
+* [youtube] Recognize new locale-based player URLs (#12885)
++ [streamable] Add support for new embedded URL schema (#12844)
+* [arte:+7] Relax URL regular expression (#12837)
+
+
+version 2017.04.26
+
+Core
+* Introduce --keep-fragments for keeping fragments of fragmented download
+ on disk after download is finished
+* [YoutubeDL] Fix output template for missing timestamp (#12796)
+* [socks] Handle cases where credentials are required but missing
+* [extractor/common] Improve HLS extraction (#12211)
+ * Extract m3u8 parsing to separate method
+ * Improve rendition groups extraction
+ * Build stream name according stream GROUP-ID
+ * Ignore reference to AUDIO group without URI when stream has no CODECS
+ * Use float for scaled tbr in _parse_m3u8_formats
+* [utils] Add support for TTML styles in dfxp2srt
+* [downloader/hls] No need to download keys for fragments that have been
+ already downloaded
+* [downloader/fragment] Improve fragment downloading
+ * Resume immediately
+ * Don't concatenate fragments and decrypt them on every resume
+ * Optimize disk storage usage, don't store intermediate fragments on disk
+ * Store bookkeeping download state file
++ [extractor/common] Add support for multiple getters in try_get
++ [extractor/common] Add support for video of WebPage context in _json_ld
+ (#12778)
++ [extractor/common] Relax JWPlayer regular expression and remove
+ duplicate URLs (#12768)
+
+Extractors
+* [iqiyi] Fix extraction of Yule videos
+* [vidio] Improve extraction and sort formats
++ [brightcove] Match only video elements with data-video-id attribute
+* [iqiyi] Fix playlist detection (#12504)
+- [azubu] Remove extractor (#12813)
+* [porn91] Fix extraction (#12814)
+* [vidzi] Fix extraction (#12793)
++ [amp] Extract error message (#12795)
++ [xfileshare] Add support for gorillavid.com and daclips.com (#12776)
+* [instagram] Fix extraction (#12777)
++ [generic] Support Brightcove videos in <iframe> (#12482)
++ [brightcove] Support URLs with bcpid instead of playerID (#12482)
+* [brightcove] Fix _extract_url (#12782)
++ [odnoklassniki] Extract HLS formats
+
+
+version 2017.04.17
+
+Extractors
+* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
+ add support for channel and channelList embeds
+* [generic] Extract multiple Limelight embeds (#12761)
++ [itv] Extract series metadata
+* [itv] Fix RTMP formats downloading (#12759)
+* [itv] Use native HLS downloader by default
++ [go90] Extract subtitles (#12752)
++ [go90] Extract series metadata (#12752)
+
+
+version 2017.04.16
+
+Core
+* [YoutubeDL] Apply expand_path after output template substitution
++ [YoutubeDL] Propagate overridden meta fields to extraction results of type
+ url (#11163)
+
+Extractors
++ [generic] Extract RSS entries as url_transparent (#11163)
++ [streamango] Add support for streamango.com (#12643)
++ [wsj:article] Add support for articles (#12558)
+* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
+ URLs (#9163, #12005, #12178, #12480)
++ [udemy] Add support for react rendition (#12744)
+
+
+version 2017.04.15
+
+Extractors
+* [youku] Fix fileid extraction (#12741, #12743)
+
+
+version 2017.04.14
+
+Core
++ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
++ [adobepass] Improve Comcast and Verizon login code (#10803)
++ [adobepass] Add support for Verizon (#10803)
+
+Extractors
++ [aenetworks] Add support for specials (#12723)
++ [hbo] Extract HLS formats
++ [go90] Add support for go90.com (#10127)
++ [tv2hu] Add support for tv2.hu (#10509)
++ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654)
+* [youtube] Improve HLS formats extraction
+* [afreecatv] Fix extraction for videos with different key layout (#12718)
+- [youtube] Remove explicit preference for audio-only and video-only formats in
+ order not to break sorting when new formats appear
+* [canalplus] Bypass geo restriction
+
+
+version 2017.04.11
+
+Extractors
+* [afreecatv] Fix extraction (#12706)
++ [generic] Add support for <object> YouTube embeds (#12637)
+* [bbccouk] Treat bitrate as audio+video bitrate in media selector
++ [bbccouk] Skip unrecognized formats in media selector (#12701)
++ [bbccouk] Add support for https protocol in media selector (#12701)
+* [curiositystream] Fix extraction (#12638)
+* [adn] Update subtitle decryption key
+* [chaturbate] Fix extraction (#12665, #12688, #12690)
+
+
+version 2017.04.09
+
+Extractors
++ [medici] Add support for medici.tv (#3406)
++ [rbmaradio] Add support for redbullradio.com URLs (#12687)
++ [npo:live] Add support for default URL (#12555)
+* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
++ [thesun] Add support for thesun.co.uk (#11298, #12674)
++ [ceskateleveize:porady] Add support for porady (#7411, #12645)
+* [ceskateleveize] Improve extraction and remove URL replacement hacks
++ [kaltura] Add support for iframe embeds (#12679)
+* [airmozilla] Fix extraction (#12670)
+* [wshh] Extract html5 entries and delegate to generic extractor (12676)
++ [raiplay] Extract subtitles
++ [xfileshare] Add support for vidlo.us (#12660)
++ [xfileshare] Add support for vidbom.com (#12661)
++ [aenetworks] Add more video URL regular expressions (#12657)
++ [odnoklassniki] Fix format sorting for 1080p quality
++ [rtl2] Add support for you.rtl2.de (#10257)
++ [vshare] Add support for vshare.io (#12278)
+
+
+version 2017.04.03
+
+Core
++ [extractor/common] Add censorship check for TransTelekom ISP
+* [extractor/common] Move censorship checks to a separate method
+
+Extractors
++ [discoveryvr] Add support for discoveryvr.com (#12578)
++ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
++ [periscope] Add support for pscp.tv URLs (#12618, #12625)
+
+
+version 2017.04.02
+
+Core
+* [YoutubeDL] Return early when extraction of url_transparent fails
+
+Extractors
+* [rai] Fix and improve extraction (#11790)
++ [vrv] Add support for series pages
+* [limelight] Improve extraction for audio only formats
+* [funimation] Fix extraction (#10696, #11773)
++ [xfileshare] Add support for vidabc.com (#12589)
++ [xfileshare] Improve extraction and extract hls formats
++ [crunchyroll] Pass geo verification proxy
++ [cwtv] Extract ISM formats
++ [tvplay] Bypass geo restriction
++ [vrv] Add support for vrv.co
++ [packtpub] Add support for packtpub.com (#12610)
++ [generic] Pass base_url to _parse_jwplayer_data
++ [adn] Add support for animedigitalnetwork.fr (#4866)
++ [allocine] Extract more metadata
+* [allocine] Fix extraction (#12592)
+* [openload] Fix extraction
+
+
+version 2017.03.26
+
+Core
+* Don't raise an error if JWPlayer config data is not a Javascript object
+ literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
+* Expand environment variables for options representing paths (#12556)
++ [utils] Introduce expand_path
+* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
+
+Extractors
+* [afreecatv] Fix extraction (#12179)
++ [atvat] Add support for atv.at (#5325)
++ [fox] Add metadata extraction (#12391)
++ [atresplayer] Extract DASH formats
++ [atresplayer] Extract HD manifest (#12548)
+* [atresplayer] Fix login error detection (#12548)
+* [franceculture] Fix extraction (#12547)
+* [youtube] Improve URL regular expression (#12538)
+* [generic] Do not follow redirects to the same URL
+
+
+version 2017.03.24
+
+Extractors
+- [9c9media] Remove mp4 URL extraction request
++ [bellmedia] Add support for etalk.ca and space.ca (#12447)
+* [channel9] Fix extraction (#11323)
+* [cloudy] Fix extraction (#12525)
++ [hbo] Add support for free episode URLs and new formats extraction (#12519)
+* [condenast] Fix extraction and style (#12526)
+* [viu] Relax URL regular expression (#12529)
+
+
+version 2017.03.22
+
+Extractors
+- [pluralsight] Omit module title from video title (#12506)
+* [pornhub] Decode obfuscated video URL (#12470, #12515)
+* [senateisvp] Allow https URL scheme for embeds (#12512)
+
+
+version 2017.03.20
+
+Core
++ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
+ output template
++ [adobepass] Detect and output error on authz token extraction (#12472)
+
+Extractors
++ [bostonglobe] Add extractor for bostonglobe.com (#12099)
++ [toongoggles] Add support for toongoggles.com (#12171)
++ [medialaan] Add support for Medialaan sites (#9974, #11912)
++ [discoverynetworks] Add support for more domains and bypass geo restriction
+* [openload] Fix extraction (#10408)
+
+
+version 2017.03.16
+
+Core
++ [postprocessor/ffmpeg] Add support for flac
++ [extractor/common] Extract SMIL formats from jwplayer
+
+Extractors
++ [generic] Add forgotten return for jwplayer formats
+* [redbulltv] Improve extraction
+
+
+version 2017.03.15
+
+Core
+* Fix missing subtitles if --add-metadata is used (#12423)
+
+Extractors
+* [facebook] Make title optional (#12443)
++ [mitele] Add support for ooyala videos (#12430)
+* [openload] Fix extraction (#12435, #12446)
+* [streamable] Update API URL (#12433)
++ [crunchyroll] Extract season name (#12428)
+* [discoverygo] Bypass geo restriction
++ [discoverygo:playlist] Add support for playlists (#12424)
+
+
+version 2017.03.10
+
+Extractors
+* [generic] Make title optional for jwplayer embeds (#12410)
+* [wdr:maus] Fix extraction (#12373)
+* [prosiebensat1] Improve title extraction (#12318, #12327)
+* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393)
+* [miomio] Fix extraction (#12291, #12388, #12402)
+* [telequebec] Fix description extraction (#12399)
+* [openload] Fix extraction (#12357)
+* [brightcove:legacy] Relax videoPlayer validation check (#12381)
+
+
+version 2017.03.07
+
+Core
+* Metadata are now added after conversion (#5594)
+
+Extractors
+* [soundcloud] Update client id (#12376)
+* [openload] Fix extraction (#10408, #12357)
+
+
+version 2017.03.06
+
+Core
++ [utils] Process bytestrings in urljoin (#12369)
+* [extractor/common] Improve height extraction and extract bitrate
+* [extractor/common] Move jwplayer formats extraction in separate method
++ [external:ffmpeg] Limit test download size to 10KiB (#12362)
+
+Extractors
++ [drtv] Add geo countries to GeoRestrictedError
++ [drtv:live] Bypass geo restriction
++ [tunepk] Add extractor (#12197, #12243)
+
+
+version 2017.03.05
+
+Extractors
++ [twitch] Add basic support for two-factor authentication (#11974)
++ [vier] Add support for vijf.be (#12304)
++ [redbulltv] Add support for redbull.tv (#3919, #11948)
+* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316)
++ [generic] Add support for rutube embeds
++ [rutube] Relax URL regular expression
++ [vrak] Add support for vrak.tv (#11452)
++ [brightcove:new] Add ability to smuggle geo_countries into URL
++ [brightcove:new] Raise GeoRestrictedError
+* [go] Relax URL regular expression (#12341)
+* [24video] Use original host for requests (#12339)
+* [ruutu] Disable DASH formats (#12322)
+
+
+version 2017.03.02
+
+Core
++ [adobepass] Add support for Charter Spectrum (#11465)
+* [YoutubeDL] Don't sanitize identifiers in output template (#12317)
+
+Extractors
+* [facebook] Fix extraction (#12323, #12330)
+* [youtube] Mark errors about rental videos as expected (#12324)
++ [npo] Add support for audio
+* [npo] Adapt to app.php API (#12311, #12320)
+
+
+version 2017.02.28
+
+Core
++ [utils] Add bytes_to_long and long_to_bytes
++ [utils] Add pkcs1pad
++ [aes] Add aes_cbc_encrypt
+
+Extractors
++ [azmedien:showplaylist] Add support for show playlists (#12160)
++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
+ #10060)
+* [douyu] Fix extraction (#12301)
+
+
+version 2017.02.27
+
+Core
+* [downloader/common] Limit displaying 2 digits after decimal point in sleep
+ interval message (#12183)
++ [extractor/common] Add preference to _parse_html5_media_entries
+
+Extractors
++ [npo] Add support for zapp.nl
++ [npo] Add support for hetklokhuis.nl (#12293)
+- [scivee] Remove extractor (#9315)
++ [cda] Decode download URL (#12255)
++ [crunchyroll] Improve uploader extraction (#12267)
++ [youtube] Raise GeoRestrictedError
++ [dailymotion] Raise GeoRestrictedError
++ [mdr] Recognize more URL patterns (#12169)
++ [tvigle] Raise GeoRestrictedError
+* [vevo] Fix extraction for videos with the new streams/streamsV3 format
+ (#11719)
++ [freshlive] Add support for freshlive.tv (#12175)
++ [xhamster] Capture and output videoClosed error (#12263)
++ [etonline] Add support for etonline.com (#12236)
++ [njpwworld] Add support for njpwworld.com (#11561)
+* [amcnetworks] Relax URL regular expression (#12127)
+
+
+version 2017.02.24.1
+
+Extractors
+* [noco] Modernize
+* [noco] Switch login URL to https (#12246)
++ [thescene] Extract more metadata
+* [thescene] Fix extraction (#12235)
++ [tubitv] Use geo bypass mechanism
+* [openload] Fix extraction (#10408)
++ [ivi] Raise GeoRestrictedError
+
+
+version 2017.02.24
+
+Core
+* [options] Hide deprecated options from --help
+* [options] Deprecate --autonumber-size
++ [YoutubeDL] Add support for string formatting operations in output template
+ (#5185, #5748, #6841, #9929, #9966 #9978, #12189)
+
+Extractors
++ [lynda:course] Add webpage extraction fallback (#12238)
+* [go] Sign all uplynk URLs and use geo bypass only for free videos
+ (#12087, #12210)
++ [skylinewebcams] Add support for skylinewebcams.com (#12221)
++ [instagram] Add support for multi video posts (#12226)
++ [crunchyroll] Extract playlist entries ids
+* [mgtv] Fix extraction
++ [sohu] Raise GeoRestrictedError
++ [leeco] Raise GeoRestrictedError and use geo bypass mechanism
+
+
+version 2017.02.22
+
+Extractors
+* [crunchyroll] Fix descriptions with double quotes (#12124)
+* [dailymotion] Make comment count optional (#12209)
++ [vidzi] Add support for vidzi.cc (#12213)
++ [24video] Add support for 24video.tube (#12217)
++ [crackle] Use geo bypass mechanism
++ [viewster] Use geo verification headers
++ [tfo] Improve geo restriction detection and use geo bypass mechanism
++ [telequebec] Use geo bypass mechanism
++ [limelight] Extract PlaylistService errors and improve geo restriction
+ detection
+
+
+version 2017.02.21
+
+Core
+* [extractor/common] Allow calling _initialize_geo_bypass from extractors
+ (#11970)
++ [adobepass] Add support for Time Warner Cable (#12191)
++ [travis] Run tests in parallel
++ [downloader/ism] Honor HTTP headers when downloading fragments
++ [downloader/dash] Honor HTTP headers when downloading fragments
++ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4
++ Add option --geo-bypass-country for explicit geo bypass on behalf of
+ specified country
++ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass
++ Add experimental geo restriction bypass mechanism based on faking
+ X-Forwarded-For HTTP header
++ [utils] Introduce GeoRestrictedError for geo restricted videos
++ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions
+
+Extractors
++ [ninecninemedia] Use geo bypass mechanism
+* [spankbang] Make uploader optional (#12193)
++ [iprima] Improve geo restriction detection and disable geo bypass
+* [iprima] Modernize
+* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2
++ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180)
+* [nrk] Update _API_HOST and relax _VALID_URL
++ [tv4] Bypass geo restriction and improve detection
+* [tv4] Switch to hls3 protocol (#12177)
++ [viki] Improve geo restriction detection
++ [vgtv] Improve geo restriction detection
++ [srgssr] Improve geo restriction detection
++ [vbox7] Improve geo restriction detection and use geo bypass mechanism
++ [svt] Improve geo restriction detection and use geo bypass mechanism
++ [pbs] Improve geo restriction detection and use geo bypass mechanism
++ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism
++ [nrk] Improve geo restriction detection and use geo bypass mechanism
++ [itv] Improve geo restriction detection and use geo bypass mechanism
++ [go] Improve geo restriction detection and use geo bypass mechanism
++ [dramafever] Improve geo restriction detection and use geo bypass mechanism
+* [brightcove:legacy] Restrict videoPlayer value (#12040)
++ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679)
++ [thisav] Add support for HTML5 media (#11771)
+* [metacafe] Bypass family filter (#10371)
+* [viceland] Improve info extraction
+
+
+version 2017.02.17
+
+Extractors
+* [heise] Improve extraction (#9725)
+* [ellentv] Improve (#11653)
+* [openload] Fix extraction (#10408, #12002)
++ [theplatform] Recognize URLs with whitespaces (#12044)
+* [einthusan] Relax URL regular expression (#12141, #12159)
++ [generic] Support complex JWPlayer embedded videos (#12030)
+* [elpais] Improve extraction (#12139)
+
+
+version 2017.02.16
+
+Core
++ [utils] Add support for quoted string literals in --match-filter (#8050,
+ #12142, #12144)
+
+Extractors
+* [ceskatelevize] Lower priority for audio description sources (#12119)
+* [amcnetworks] Fix extraction (#12127)
+* [pinkbike] Fix uploader extraction (#12054)
++ [onetpl] Add support for businessinsider.com.pl and plejada.pl
++ [onetpl] Add support for onet.pl (#10507)
++ [onetmvp] Add shortcut extractor
++ [vodpl] Add support for vod.pl (#12122)
++ [pornhub] Extract video URL from tv platform site (#12007, #12129)
++ [ceskatelevize] Extract DASH formats (#12119, #12133)
+
+
+version 2017.02.14
+
+Core
+* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
+
+Extractor
+* [zdf] Fix extraction (#12117)
+* [xtube] Fix extraction for both kinds of video id (#12088)
+* [xtube] Improve title extraction (#12088)
++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
+* [bellmedia] Allow video id longer than 6 characters (#12114)
++ [limelight] Add support for referer protected videos
+* [disney] Improve extraction (#4975, #11000, #11882, #11936)
+* [hotstar] Improve extraction (#12096)
+* [einthusan] Fix extraction (#11416)
++ [aenetworks] Add support for lifetimemovieclub.com (#12097)
+* [youtube] Fix parsing codecs (#12091)
+
+
+version 2017.02.11
+
+Core
++ [utils] Introduce get_elements_by_class and get_elements_by_attribute
+ utility functions
++ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
+
+Extractor
+* [pluralsight:course] Fix extraction (#12075)
++ [bbc] Extract m3u8 formats with 320k audio
+* [facebook] Relax video id matching (#11017, #12055, #12056)
++ [corus] Add support for Corus Entertainment sites (#12060, #9164)
++ [pluralsight] Detect blocked account error message (#12070)
++ [bloomberg] Add another video id pattern (#12062)
+* [extractor/commonmistakes] Restrict URL regular expression (#12050)
++ [tvplayer] Add support for tvplayer.com
+
+
+version 2017.02.10
+
+Extractors
+* [xtube] Fix extraction (#12023)
+* [pornhub] Fix extraction (#12007, #12018)
+* [facebook] Improve JS data regular expression (#12042)
+* [kaltura] Improve embed partner id extraction (#12041)
++ [sprout] Add support for sproutonline.com
+* [6play] Improve extraction
++ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
++ [go] Add support for Adobe Pass authentication (#11468, #10831)
+* [6play] Fix extraction (#12011)
++ [nbc] Add support for Adobe Pass authentication (#12006)
+
+
+version 2017.02.07
+
+Core
+* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
++ [downloader/fragment] Respect --no-part
+* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
+
+Extractors
+* [pornhub] Fix extraction (#11997)
++ [canalplus] Add support for cstar.fr (#11990)
++ [extractor/generic] Improve RTMP support (#11993)
++ [gaskrank] Add support for gaskrank.tv (#11685)
+* [bandcamp] Fix extraction for incomplete albums (#11727)
+* [iwara] Fix extraction (#11781)
+* [googledrive] Fix extraction on Python 3.6
++ [videopress] Add support for videopress.com
++ [afreecatv] Extract RTMP formats
+
+
+version 2017.02.04.1
+
+Extractors
++ [twitch:stream] Add support for player.twitch.tv (#11971)
+* [radiocanada] Fix extraction for toutv rtmp formats
+
+
+version 2017.02.04
+
+Core
++ Add --playlist-random to shuffle playlists (#11889, #11901)
+* [utils] Improve comments processing in js_to_json (#11947)
+* [utils] Handle single-line comments in js_to_json
+* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter
+
+Extractors
++ [piksel] Add another app token pattern (#11969)
++ [vk] Capture and output author blocked error message (#11965)
++ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373,
+ #11800)
++ [drtv] Add support for live and radio sections (#1827, #3427)
+* [myspace] Fix extraction and extract HLS and HTTP formats
++ [youtube] Add format info for itag 325 and 328
+* [vine] Fix extraction (#11955)
+- [sportbox] Remove extractor (#11954)
++ [filmon] Add support for filmon.com (#11187)
++ [infoq] Add audio only formats (#11565)
+* [douyutv] Improve room id regular expression (#11931)
+* [iprima] Fix extraction (#11920, #11896)
+* [youtube] Fix ytsearch when cookies are provided (#11924)
+* [go] Relax video id regular expression (#11937)
+* [facebook] Fix title extraction (#11941)
++ [youtube:playlist] Recognize TL playlists (#11945)
++ [bilibili] Support new Bangumi URLs (#11845)
++ [cbc:watch] Extract audio codec for audio only formats (#11893)
++ [elpais] Fix extraction for some URLs (#11765)
+
+
+version 2017.02.01
+
+Extractors
++ [facebook] Add another fallback extraction scenario (#11926)
+* [prosiebensat1] Fix extraction of descriptions (#11810, #11929)
+- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028)
++ [vimeo] Extract upload timestamp
++ [vimeo] Extract license (#8726, #11880)
++ [nrk:series] Add support for series (#11571, #11711)
+
+
+version 2017.01.31
+
+Core
++ [compat] Add compat_etree_register_namespace
+
+Extractors
+* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892,
+ #11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909,
+ #11913, #11914, #11915, #11916, #11917, #11918, #11919)
++ [vimeo] Extract both mixed and separated DASH formats
++ [ruutu] Extract DASH formats
+* [itv] Fix extraction for python 2.6
+
+
+version 2017.01.29
+
+Core
+* [extractor/common] Fix initialization template (#11605, #11825)
++ [extractor/common] Document fragment_base_url and fragment's path fields
+* [extractor/common] Fix duration per DASH segment (#11868)
++ Introduce --autonumber-start option for initial value of %(autonumber)s
+ template (#727, #2702, #9362, #10457, #10529, #11862)
+
+Extractors
++ [azmedien:playlist] Add support for topic and themen playlists (#11817)
+* [npo] Fix subtitles extraction
++ [itv] Extract subtitles
++ [itv] Add support for itv.com (#9240)
++ [mtv81] Add support for mtv81.com (#7619)
++ [vlive] Add support for channels (#11826)
++ [kaltura] Add fallback for fileExt
++ [kaltura] Improve uploader_id extraction
++ [konserthusetplay] Add support for rspoplay.se (#11828)
+
+
+version 2017.01.28
+
+Core
+* [utils] Improve parse_duration
+
+Extractors
+* [crunchyroll] Improve series and season metadata extraction (#11832)
+* [soundcloud] Improve formats extraction and extract audio bitrate
++ [soundcloud] Extract HLS formats
+* [soundcloud] Fix track URL extraction (#11852)
++ [twitch:vod] Expand URL regular expressions (#11846)
+* [aenetworks] Fix season episodes extraction (#11669)
++ [tva] Add support for videos.tva.ca (#11842)
+* [jamendo] Improve and extract more metadata (#11836)
++ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000)
+* [vevo] Remove request to old API and catch API v2 errors
++ [cmt,mtv,southpark] Add support for episode URLs (#11837)
++ [youtube] Add fallback for duration extraction (#11841)
+
+
+version 2017.01.25
+
+Extractors
++ [openload] Fallback video extension to mp4
++ [extractor/generic] Add support for Openload embeds (#11536, #11812)
+* [srgssr] Fix rts video extraction (#11831)
++ [afreecatv:global] Add support for afreeca.tv (#11807)
++ [crackle] Extract vtt subtitles
++ [crackle] Extract multiple resolutions for thumbnails
++ [crackle] Add support for mobile URLs
++ [konserthusetplay] Extract subtitles (#11823)
++ [konserthusetplay] Add support for HLS videos (#11823)
+* [vimeo:review] Fix config URL extraction (#11821)
+
+
+version 2017.01.24
+
+Extractors
+* [pluralsight] Fix extraction (#11820)
++ [nextmedia] Add support for NextTV (壹電視)
+* [24video] Fix extraction (#11811)
+* [youtube:playlist] Fix nonexistent and private playlist detection (#11604)
++ [chirbit] Extract uploader (#11809)
+
+
+version 2017.01.22
+
+Extractors
++ [pornflip] Add support for pornflip.com (#11556, #11795)
+* [chaturbate] Fix extraction (#11797, #11802)
++ [azmedien] Add support for AZ Medien sites (#11784, #11785)
++ [nextmedia] Support redirected URLs
++ [vimeo:channel] Extract videos' titles for playlist entries (#11796)
++ [youtube] Extract episode metadata (#9695, #11774)
++ [cspan] Support Ustream embedded videos (#11547)
++ [1tv] Add support for HLS videos (#11786)
+* [uol] Fix extraction (#11770)
+* [mtv] Relax triforce feed regular expression (#11766)
+
+
+version 2017.01.18
+
+Extractors
+* [bilibili] Fix extraction (#11077)
++ [canalplus] Add fallback for video id (#11764)
+* [20min] Fix extraction (#11683, #11751)
+* [imdb] Extend URL regular expression (#11744)
++ [naver] Add support for tv.naver.com links (#11743)
+
+
+version 2017.01.16
+
+Core
+* [options] Apply custom config to final composite configuration (#11741)
+* [YoutubeDL] Improve protocol auto determining (#11720)
+
+Extractors
+* [xiami] Relax URL regular expressions
+* [xiami] Improve track metadata extraction (#11699)
++ [limelight] Check hand-make direct HTTP links
++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
++ [brightcove] Recognize another player ID pattern (#11688)
++ [niconico] Support login via cookies (#7968)
+* [yourupload] Fix extraction (#11601)
++ [beam:live] Add support for beam.pro live streams (#10702, #11596)
+* [vevo] Improve geo restriction detection
++ [dramafever] Add support for URLs with language code (#11714)
+* [cbc] Improve playlist support (#11704)
+
+
+version 2017.01.14
+
+Core
++ [common] Add ability to customize akamai manifest host
++ [utils] Add more date formats
+
+Extractors
+- [mtv] Eliminate _transform_rtmp_url
+* [mtv] Generalize triforce mgid extraction
++ [cmt] Add support for full episodes and video clips (#11623)
++ [mitele] Extract DASH formats
++ [ooyala] Add support for videos with embedToken (#11684)
+* [mixcloud] Fix extraction (#11674)
+* [openload] Fix extraction (#10408)
+* [tv4] Improve extraction (#11698)
+* [freesound] Fix and improve extraction (#11602)
++ [nick] Add support for beta.nick.com (#11655)
+* [mtv,cc] Use HLS by default with native HLS downloader (#11641)
+* [mtv] Fix non-HLS extraction
+
+
+version 2017.01.10
+
+Extractors
+* [youtube] Fix extraction (#11663, #11664)
++ [inc] Add support for inc.com (#11277, #11647)
++ [youtube] Add itag 212 (#11575)
++ [egghead:course] Add support for egghead.io courses
+
+
+version 2017.01.08
+
+Core
+* Fix "invalid escape sequence" errors under Python 3.6 (#11581)
+
+Extractors
++ [hitrecord] Add support for hitrecord.org (#10867, #11626)
+- [videott] Remove extractor
+* [swrmediathek] Improve extraction
+- [sharesix] Remove extractor
+- [aol:features] Remove extractor
+* [sendtonews] Improve info extraction
+* [3sat,phoenix] Fix extraction (#11619)
+* [comedycentral/mtv] Add support for HLS videos (#11600)
+* [discoverygo] Fix JSON data parsing (#11219, #11522)
+
+
+version 2017.01.05
+
+Extractors
++ [zdf] Fix extraction (#11055, #11063)
+* [pornhub:playlist] Improve extraction (#11594)
++ [cctv] Add support for ncpa-classic.com (#11591)
++ [tunein] Add support for embeds (#11579)
+
+
+version 2017.01.02
+
+Extractors
+* [cctv] Improve extraction (#879, #6753, #8541)
++ [nrktv:episodes] Add support for episodes (#11571)
++ [arkena] Add support for video.arkena.com (#11568)
+
+
+version 2016.12.31
+
+Core
++ Introduce --config-location option for custom configuration files (#6745,
+ #10648)
+
+Extractors
++ [twitch] Add support for player.twitch.tv (#11535, #11537)
++ [videa] Add support for videa.hu (#8181, #11133)
+* [vk] Fix postlive videos extraction
+* [vk] Extract from playerParams (#11555)
+- [freevideo] Remove extractor (#11515)
++ [showroomlive] Add support for showroom-live.com (#11458)
+* [xhamster] Fix duration extraction (#11549)
+* [rtve:live] Fix extraction (#11529)
+* [brightcove:legacy] Improve embeds detection (#11523)
++ [twitch] Add support for rechat messages (#11524)
+* [acast] Fix audio and timestamp extraction (#11521)
+
+
+version 2016.12.22
+
+Core
+* [extractor/common] Improve detection of video-only formats in m3u8
+ manifests (#11507)
+
+Extractors
++ [theplatform] Pass geo verification headers to SMIL request (#10146)
++ [viu] Pass geo verification headers to auth request
+* [rtl2] Extract more formats and metadata
+* [vbox7] Skip malformed JSON-LD (#11501)
+* [uplynk] Force downloading using native HLS downloader (#11496)
++ [laola1] Add support for another extraction scenario (#11460)
+
+
+version 2016.12.20
+
+Core
+* [extractor/common] Improve fragment URL construction for DASH media
+* [extractor/common] Fix codec information extraction for mixed audio/video
+ DASH media (#11490)
+
+Extractors
+* [vbox7] Fix extraction (#11494)
++ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027)
++ [piksel] Add support for player.piksel.com (#11246)
++ [vimeo] Add support for DASH formats
+* [vimeo] Fix extraction for HLS formats (#11490)
+* [kaltura] Fix wrong widget ID in some cases (#11480)
++ [nrktv:direkte] Add support for live streams (#11488)
+* [pbs] Fix extraction for geo restricted videos (#7095)
+* [brightcove:new] Skip widevine classic videos
++ [viu] Add support for viu.com (#10607, #11329)
+
+
+version 2016.12.18
+
+Core
++ [extractor/common] Recognize DASH formats in html5 media entries
+
+Extractors
++ [ccma] Add support for ccma.cat (#11359)
+* [laola1tv] Improve extraction
++ [laola1tv] Add support embed URLs (#11460)
+* [nbc] Fix extraction for MSNBC videos (#11466)
+* [twitch] Adapt to new videos pages URL schema (#11469)
++ [meipai] Add support for meipai.com (#10718)
+* [jwplatform] Improve subtitles and duration extraction
++ [ondemandkorea] Add support for ondemandkorea.com (#10772)
++ [vvvvid] Add support for vvvvid.it (#5915)
+
+
+version 2016.12.15
+
+Core
++ [utils] Add convenience urljoin
+
+Extractors
++ [openload] Recognize oload.tv URLs (#10408)
++ [facebook] Recognize .onion URLs (#11443)
+* [vlive] Fix extraction (#11375, #11383)
++ [canvas] Extract DASH formats
++ [melonvod] Add support for vod.melon.com (#11419)
+
+
+version 2016.12.12
+
+Core
++ [utils] Add common user agents map
++ [common] Recognize HLS manifests that contain video only formats (#11394)
+
+Extractors
++ [dplay] Use Safari user agent for HLS (#11418)
++ [facebook] Detect login required error message
+* [facebook] Improve video selection (#11390)
++ [canalplus] Add another video id pattern (#11399)
+* [mixcloud] Relax URL regular expression (#11406)
+* [ctvnews] Relax URL regular expression (#11394)
++ [rte] Capture and output error message (#7746, #10498)
++ [prosiebensat1] Add support for DASH formats
+* [srgssr] Improve extraction for geo restricted videos (#11089)
+* [rts] Improve extraction for geo restricted videos (#4989)
+
+
+version 2016.12.09
+
+Core
+* [socks] Fix error reporting (#11355)
+
+Extractors
+* [openload] Fix extraction (#10408)
+* [pandoratv] Fix extraction (#11023)
++ [telebruxelles] Add support for emission URLs
+* [telebruxelles] Extract all formats
++ [bloomberg] Add another video id regular expression (#11371)
+* [fusion] Update ooyala id regular expression (#11364)
++ [1tv] Add support for playlists (#11335)
+* [1tv] Improve extraction (#11335)
++ [aenetworks] Extract more formats (#11321)
++ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
+
+
+version 2016.12.01
+
+Extractors
+* [soundcloud] Update client id (#11327)
+* [ruutu] Detect DRM protected videos
++ [liveleak] Add support for youtube embeds (#10688)
+* [spike] Fix full episodes support (#11312)
+* [comedycentral] Fix full episodes support
+* [normalboots] Rewrite in terms of JWPlatform (#11184)
+* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
+- [screenwavemedia] Remove extractor (#11184)
+
+
+version 2016.11.27
+
+Extractors
++ [webcaster] Add support for webcaster.pro
++ [azubu] Add support for azubu.uol.com.br (#11305)
+* [viki] Prefer hls formats
+* [viki] Fix rtmp formats extraction (#11255)
+* [puls4] Relax URL regular expression (#11267)
+* [vevo] Improve artist extraction (#10911)
+* [mitele] Relax URL regular expression and extract more metadata (#11244)
++ [cbslocal] Recognize New York site (#11285)
++ [youtube:playlist] Pass disable_polymer in URL query (#11193)
+
+
+version 2016.11.22
+
+Extractors
+* [hellporno] Fix video extension extraction (#11247)
++ [hellporno] Add support for hellporno.net (#11247)
++ [amcnetworks] Recognize more BBC America URLs (#11263)
+* [funnyordie] Improve extraction (#11208)
+* [extractor/generic] Improve limelight embeds support
+- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
+* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
+* [twitter:card] Relax URL regular expression (#11225)
++ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
+
+
+version 2016.11.18
+
+Extractors
+* [youtube:live] Relax URL regular expression (#11164)
+* [openload] Fix extraction (#10408, #11122)
+* [vlive] Prefer locale over language for subtitles id (#11203)
+
+
+version 2016.11.14.1
+
+Core
++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
+* [extractor/common] Fix media templates with Bandwidth substitution pattern in
+ MPD manifests (#11175)
+* [extractor/common] Improve thumbnail extraction from JSON-LD
+
+Extractors
++ [nrk] Workaround geo restriction
++ [nrk] Improve error detection and messages
++ [afreecatv] Add support for vod.afreecatv.com (#11174)
+* [cda] Fix and improve extraction (#10929, #10936)
+* [plays] Fix extraction (#11165)
+* [eagleplatform] Fix extraction (#11160)
++ [audioboom] Recognize /posts/ URLs (#11149)
+
+
+version 2016.11.08.1
+
+Extractors
+* [espn:article] Fix support for espn.com articles
+* [franceculture] Fix extraction (#11140)
+
+
+version 2016.11.08
+
+Extractors
+* [tmz:article] Fix extraction (#11052)
+* [espn] Fix extraction (#11041)
+* [mitele] Fix extraction after website redesign (#10824)
+- [ard] Remove age restriction check (#11129)
+* [generic] Improve support for pornhub.com embeds (#11100)
++ [generic] Add support for redtube.com embeds (#11099)
++ [generic] Add support for drtuber.com embeds (#11098)
++ [redtube] Add support for embed URLs
++ [drtuber] Add support for embed URLs
++ [yahoo] Improve content id extraction (#11088)
+* [toutv] Relax URL regular expression (#11121)
+
+
+version 2016.11.04
+
+Core
+* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
+ manifests (#11113)
+* [downloader/ism] Fix AVC Decoder Configuration Record
+
+Extractors
++ [fox9] Add support for fox9.com (#11110)
++ [anvato] Extract more metadata and improve formats extraction
+* [vodlocker] Improve removed videos detection (#11106)
++ [vzaar] Add support for vzaar.com (#11093)
++ [vice] Add support for uplynk preplay videos (#11101)
+* [tubitv] Fix extraction (#11061)
++ [shahid] Add support for authentication (#11091)
++ [radiocanada] Add subtitles support (#11096)
++ [generic] Add support for ISM manifests
+
+
+version 2016.11.02
+
+Core
++ Add basic support for Smooth Streaming protocol (#8118, #10969)
+* Improve MPD manifest base URL extraction (#10909, #11079)
+* Fix --match-filter for int-like strings (#11082)
+
+Extractors
++ [mva] Add support for ISM formats
++ [msn] Add support for ISM formats
++ [onet] Add support for ISM formats
++ [tvp] Add support for ISM formats
++ [nicknight] Add support for nicknight sites (#10769)
+
+
+version 2016.10.30
+
+Extractors
+* [facebook] Improve 1080P video detection (#11073)
+* [imgur] Recognize /r/ URLs (#11071)
+* [beeg] Fix extraction (#11069)
+* [openload] Fix extraction (#10408)
+* [gvsearch] Modernize and fix search request (#11051)
+* [adultswim] Fix extraction (#10979)
++ [nobelprize] Add support for nobelprize.org (#9999)
+* [hornbunny] Fix extraction (#10981)
+* [tvp] Improve video id extraction (#10585)
+
+
+version 2016.10.26
+
+Extractors
++ [rentv] Add support for ren.tv (#10620)
++ [ard] Detect unavailable videos (#11018)
+* [vk] Fix extraction (#11022)
+
+
+version 2016.10.25
+
+Core
+* Running youtube-dl in the background is fixed (#10996, #10706, #955)
+
+Extractors
++ [jamendo] Add support for jamendo.com (#10132, #10736)
++ [pandatv] Add support for panda.tv (#10736)
++ [dotsub] Support Vimeo embed (#10964)
+* [litv] Fix extraction
++ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
+* [vivo] Fix extraction (#11003)
++ [twitch:stream] Add support for rebroadcasts (#10995)
+* [pluralsight] Fix subtitles conversion (#10990)
+
+
+version 2016.10.21.1
+
+Extractors
++ [pluralsight] Process all clip URLs (#10984)
+
+
+version 2016.10.21
+
+Core
+- Disable thumbnails embedding in mkv
++ Add support for Comcast multiple-system operator (#10819)
+
+Extractors
+* [pluralsight] Adapt to new API (#10972)
+* [openload] Fix extraction (#10408, #10971)
++ [natgeo] Extract m3u8 formats (#10959)
+
+
+version 2016.10.19
+
+Core
++ [utils] Expose PACKED_CODES_RE
++ [extractor/common] Extract non smil wowza mpd manifests
++ [extractor/common] Detect f4m audio-only formats
+
+Extractors
+* [vidzi] Fix extraction (#10908, #10952)
+* [urplay] Fix subtitles extraction
++ [urplay] Add support for urskola.se (#10915)
++ [orf] Add subtitles support (#10939)
+* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896)
+* [nrk] Relax URL regular expression (#10928)
++ [nytimes] Add support for podcasts (#10926)
+* [pluralsight] Relax URL regular expression (#10941)
+
+
+version 2016.10.16
+
+Core
+* [postprocessor/ffmpeg] Return correct filepath and ext in updated information
+ in FFmpegExtractAudioPP (#10879)
+
+Extractors
++ [ruutu] Add support for supla.fi (#10849)
++ [theoperaplatform] Add support for theoperaplatform.eu (#10914)
+* [lynda] Fix height for prioritized streams
++ [lynda] Add fallback extraction scenario
+* [lynda] Switch to https (#10916)
++ [huajiao] New extractor (#10917)
+* [cmt] Fix mgid extraction (#10813)
++ [safari:course] Add support for techbus.safaribooksonline.com
+* [orf:tvthek] Fix extraction and modernize (#10898)
+* [chirbit] Fix extraction of user profile pages
+* [carambatv] Fix extraction
+* [canalplus] Fix extraction for some videos
+* [cbsinteractive] Fix extraction for cnet.com
+* [parliamentliveuk] Lower case URLs are now recognized (#10912)
+
+
+version 2016.10.12
+
+Core
++ Support HTML media elements without child nodes
+* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
+
+Extractors
+* [dailymotion] Fix extraction (#10901)
+* [vimeo:review] Fix extraction (#10900)
+* [nhl] Correctly handle invalid formats (#10713)
+* [footyroom] Fix extraction (#10810)
+* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
++ [hbo] Add support for episode pages (#10892)
+* [allocine] Fix extraction (#10860)
++ [nextmedia] Recognize action news on AppleDaily
+* [lego] Improve info extraction and bypass geo restriction (#10872)
+
+
+version 2016.10.07
+
+Extractors
++ [iprima] Detect geo restriction
+* [facebook] Fix video extraction (#10846)
++ [commonprotocols] Support direct MMS links (#10838)
++ [generic] Add support for multiple vimeo embeds (#10862)
++ [nzz] Add support for nzz.ch (#4407)
++ [npo] Detect geo restriction
++ [npo] Add support for 2doc.nl (#10842)
++ [lego] Add support for lego.com (#10369)
++ [tonline] Add support for t-online.de (#10376)
+* [techtalks] Relax URL regular expression (#10840)
+* [youtube:live] Extend URL regular expression (#10839)
++ [theweatherchannel] Add support for weather.com (#7188)
++ [thisoldhouse] Add support for thisoldhouse.com (#10837)
++ [nhl] Add support for wch2016.com (#10833)
+* [pornoxo] Use JWPlatform to improve metadata extraction
+
+
+version 2016.10.02
+
+Core
+* Fix possibly lost extended attributes during post-processing
++ Support pyxattr as well as python-xattr for --xattrs and
+ --xattr-set-filesize (#9054)
+
+Extractors
++ [jwplatform] Support DASH streams in JWPlayer
++ [jwplatform] Support old-style JWPlayer playlists
++ [byutv:event] Add extractor
+* [periscope:user] Fix extraction (#10820)
+* [dctp] Fix extraction (#10734)
++ [instagram] Extract video dimensions (#10790)
++ [tvland] Extend URL regular expression (#10812)
++ [vgtv] Add support for tv.aftonbladet.se (#10800)
+- [aftonbladet] Remove extractor
+* [vk] Fix timestamp and view count extraction (#10760)
++ [vk] Add support for running and finished live streams (#10799)
++ [leeco] Recognize more Le Sports URLs (#10794)
++ [instagram] Extract comments (#10788)
++ [ketnet] Extract mzsource formats (#10770)
+* [limelight:media] Improve HTTP formats extraction
+
+
+version 2016.09.27
+
+Core
++ Add hdcore query parameter to akamai f4m formats
++ Delegate HLS live streams downloading to ffmpeg
++ Improved support for HTML5 subtitles
+
+Extractors
++ [vk] Add support for dailymotion embeds (#10661)
+* [promptfile] Fix extraction (#10634)
+* [kaltura] Speed up embed regular expressions (#10764)
++ [npo] Add support for anderetijden.nl (#10754)
++ [prosiebensat1] Add support for advopedia sites
+* [mwave] Relax URL regular expression (#10735, #10748)
+* [prosiebensat1] Fix playlist support (#10745)
++ [prosiebensat1] Add support for sat1gold sites (#10745)
++ [cbsnews:livevideo] Fix extraction and extract m3u8 formats
++ [brightcove:new] Add support for live streams
+* [soundcloud] Generalize playlist entries extraction (#10733)
++ [mtv] Add support for new URL schema (#8169, #9808)
+* [einthusan] Fix extraction (#10714)
++ [twitter] Support Periscope embeds (#10737)
++ [openload] Support subtitles (#10625)
+
+
+version 2016.09.24
+
+Core
++ Add support for watchTVeverywhere.com authentication provider based MSOs for
+ Adobe Pass authentication (#10709)
+
+Extractors
++ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
++ [prosiebensat1] Add support for kabeleinsdoku (#10732)
+* [cbs] Extract info from thunder videoPlayerService (#10728)
+* [openload] Fix extraction (#10408)
++ [ustream] Support the new HLS streams (#10698)
++ [ooyala] Extract all HLS formats
++ [cartoonnetwork] Add support for Adobe Pass authentication
++ [soundcloud] Extract license metadata
++ [fox] Add support for Adobe Pass authentication (#8584)
++ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
++ [trutv] Add support for Adobe Pass authentication (#10519)
++ [turner] Add support for Adobe Pass authentication
+
+
+version 2016.09.19
+
+Extractors
++ [crunchyroll] Check if already authenticated (#10700)
+- [twitch:stream] Remove fallback to profile extraction when stream is offline
+* [thisav] Improve title extraction (#10682)
+* [vyborymos] Improve station info extraction
+
+
+version 2016.09.18
+
+Core
++ Introduce manifest_url and fragments fields in formats dictionary for
+ fragmented media
++ Provide manifest_url field for DASH segments, HLS and HDS
++ Provide fragments field for DASH segments
+* Rework DASH segments downloader to use fragments field
++ Add helper method for Wowza Streaming Engine formats extraction
+
+Extractors
++ [vyborymos] Add extractor for vybory.mos.ru (#10692)
++ [xfileshare] Add title regular expression for streamin.to (#10646)
++ [globo:article] Add support for multiple videos (#10653)
++ [thisav] Recognize HTML5 videos (#10447)
+* [jwplatform] Improve JWPlayer detection
++ [mangomolo] Add support for Mangomolo embeds
++ [toutv] Add support for authentication (#10669)
+* [franceinter] Fix upload date extraction
+* [tv4] Fix HLS and HDS formats extraction (#10659)
+
+
+version 2016.09.15
+
+Core
+* Improve _hidden_inputs
++ Introduce improved explicit Adobe Pass support
++ Add --ap-mso to provide multiple-system operator identifier
++ Add --ap-username to provide MSO account username
++ Add --ap-password to provide MSO account password
++ Add --ap-list-mso to list all supported MSOs
++ Add support for Rogers Cable multiple-system operator (#10606)
+
+Extractors
+* [crunchyroll] Fix authentication (#10655)
+* [twitch] Fix API calls (#10654, #10660)
++ [bellmedia] Add support for more Bell Media Television sites
+* [franceinter] Fix extraction (#10538, #2105)
+* [kuwo] Improve error detection (#10650)
++ [go] Add support for free full episodes (#10439)
+* [bilibili] Fix extraction for specific videos (#10647)
+* [nhk] Fix extraction (#10633)
+* [kaltura] Improve audio detection
+* [kaltura] Skip chun format
++ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
++ [nbc] Add support for NBC Olympics (#10361)
+
+
+version 2016.09.11.1
+
+Extractors
++ [tube8] Extract categories and tags (#10579)
++ [pornhub] Extract categories and tags (#10499)
+* [openload] Temporary fix (#10408)
++ [foxnews] Add support Fox News articles (#10598)
+* [viafree] Improve video id extraction (#10615)
+* [iwara] Fix extraction after relaunch (#10462, #3215)
++ [tfo] Add extractor for tfo.org
+* [lrt] Fix audio extraction (#10566)
+* [9now] Fix extraction (#10561)
++ [canalplus] Add support for c8.fr (#10577)
+* [newgrounds] Fix uploader extraction (#10584)
++ [polskieradio:category] Add support for category lists (#10576)
++ [ketnet] Add extractor for ketnet.be (#10343)
++ [canvas] Add support for een.be (#10605)
++ [telequebec] Add extractor for telequebec.tv (#1999)
+* [parliamentliveuk] Fix extraction (#9137)
+
+
+version 2016.09.08
+
+Extractors
++ [jwplatform] Extract height from format label
++ [yahoo] Extract Brightcove Legacy Studio embeds (#9345)
+* [videomore] Fix extraction (#10592)
+* [foxgay] Fix extraction (#10480)
++ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709)
+* [gamestar] Fix metadata extraction (#10479)
+* [puls4] Fix extraction (#10583)
++ [cctv] Add extractor for CCTV and CNTV (#8153)
++ [lci] Add extractor for lci.fr (#10573)
++ [wat] Extract DASH formats
++ [viafree] Improve video id detection (#10569)
++ [trutv] Add extractor for trutv.com (#10519)
++ [nick] Add support for nickelodeon.nl (#10559)
++ [abcotvs:clips] Add support for clips.abcotvs.com
++ [abcotvs] Add support for ABC Owned Television Stations sites (#9551)
++ [miaopai] Add extractor for miaopai.com (#10556)
+* [gamestar] Fix metadata extraction (#10479)
++ [bilibili] Add support for episodes (#10190)
++ [tvnoe] Add extractor for tvnoe.cz (#10524)
+
+
+version 2016.09.04.1
+
+Core
+* In DASH downloader if the first segment fails, abort the whole download
+ process to prevent throttling (#10497)
++ Add support for --skip-unavailable-fragments and --fragment retries in
+ hlsnative downloader (#10165, #10448).
++ Add support for --skip-unavailable-fragments in DASH downloader
++ Introduce --skip-unavailable-fragments option for fragment based downloaders
+ that allows to skip fragments unavailable due to a HTTP error
+* Fix extraction of video/audio entries with src attribute in
+ _parse_html5_media_entries (#10540)
+
+Extractors
+* [theplatform] Relax URL regular expression (#10546)
+* [youtube:playlist] Extend URL regular expression
+* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
+* [internetvideoarchive] Extract all formats
+* [pornvoisines] Fix extraction (#10469)
+* [rottentomatoes] Fix extraction (#10467)
+* [espn] Extend URL regular expression (#10549)
+* [vimple] Extend URL regular expression (#10547)
+* [youtube:watchlater] Fix extraction (#10544)
+* [youjizz] Fix extraction (#10437)
++ [foxnews] Add support for FoxNews Insider (#10445)
++ [fc2] Recognize Flash player URLs (#10512)
+
+
+version 2016.09.03
+
+Core
+* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
+ _extract_m3u8_formats (#10522)
+* Handle semicolon in mimetype2ext
+
+Extractors
++ [youtube] Add support for rental videos' previews (#10532)
+* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
+ no playlist is actually served (#10537)
++ [drtv] Add support for dr.dk/nyheder (#10536)
++ [facebook:plugins:video] Add extractor (#10530)
++ [go] Add extractor for *.go.com sites
+* [adobepass] Check for authz_token expiration (#10527)
+* [nytimes] improve extraction
+* [thestar] Fix extraction (#10465)
+* [glide] Fix extraction (#10478)
+- [exfm] Remove extractor (#10482)
+* [youporn] Fix categories and tags extraction (#10521)
++ [curiositystream] Add extractor for app.curiositystream.com
+- [thvideo] Remove extractor (#10464)
+* [movingimage] Fix for the new site name (#10466)
++ [cbs] Add support for once formats (#10515)
+* [limelight] Skip ism snd duplicate manifests
++ [porncom] Extract categories and tags (#10510)
++ [facebook] Extract timestamp (#10508)
++ [yahoo] Extract more formats
+
+
+version 2016.08.31
+
+Extractors
+* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
+* [bandcamp:album] Fix title extraction (#10455)
+* [pyvideo] Fix extraction (#10468)
++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
+* [9c9media] Extract more metadata
+* [9c9media] Fix multiple stacks extraction (#10016)
+* [adultswim] Improve video info extraction (#10492)
+* [vodplatform] Improve embed regular expression
+- [played] Remove extractor (#10470)
++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
+* [adultswim] Rework in terms of turner extractor
+* [cnn] Rework in terms of turner extractor
+* [nba] Rework in terms of turner extractor
++ [turner] Add base extractor for Turner Broadcasting System based sites
+* [bilibili] Fix extraction (#10375)
+* [openload] Fix extraction (#10408)
+
+
+version 2016.08.28
+
+Core
++ Add warning message that ffmpeg doesn't support SOCKS
+* Improve thumbnail sorting
++ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
+* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
++ Add ac-3 to the list of audio codecs in parse_codecs
+
+Extractors
+* [periscope:user] Fix extraction (#10453)
+* [douyutv] Fix extraction (#10153, #10318, #10444)
++ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
+- [trutube] Remove extractor (#10438)
++ [usanetwork] Add extractor for usanetwork.com
+* [crackle] Fix extraction (#10333)
+* [spankbang] Fix description and uploader extraction (#10339)
+* [discoverygo] Detect cable provider restricted videos (#10425)
++ [cbc] Add support for watch.cbc.ca
+* [kickstarter] Silent the warning for og:description (#10415)
+* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
+
+
+version 2016.08.24.1
+
+Extractors
++ [pluralsight] Add support for subtitles (#9681)
+
+
+version 2016.08.24
+
+Extractors
+* [youtube] Fix authentication (#10392)
+* [openload] Fix extraction (#10408)
++ [bravotv] Add support for Adobe Pass (#10407)
+* [bravotv] Fix clip info extraction (#10407)
+* [eagleplatform] Improve embedded videos detection (#10409)
+* [awaan] Fix extraction
+* [mtvservices:embedded] Update config URL
++ [abc:iview] Add extractor (#6148)
+
+
+version 2016.08.22
+
+Core
+* Improve formats and subtitles extension auto calculation
++ Recognize full unit names in parse_filesize
++ Add support for m3u8 manifests in HTML5 multimedia tags
+* Fix octal/hexadecimal number detection in js_to_json
+
+Extractors
++ [ivi] Add support for 720p and 1080p
++ [charlierose] Add new extractor (#10382)
+* [1tv] Fix extraction (#9249)
+* [twitch] Renew authentication
+* [kaltura] Improve subtitles extension calculation
++ [zingmp3] Add support for video clips
+* [zingmp3] Fix extraction (#10041)
+* [kaltura] Improve subtitles extraction (#10279)
+* [cultureunplugged] Fix extraction (#10330)
++ [cnn] Add support for money.cnn.com (#2797)
+* [cbsnews] Fix extraction (#10362)
+* [cbs] Fix extraction (#10393)
++ [litv] Support 'promo' URLs (#10385)
+* [snotr] Fix extraction (#10338)
+* [n-tv.de] Fix extraction (#10331)
+* [globo:article] Relax URL and video id regular expressions (#10379)
+
+
+version 2016.08.19
+
+Core
+- Remove output template description from --help
+* Recognize lowercase units in parse_filesize
+
+Extractors
++ [porncom] Add extractor for porn.com (#2251, #10251)
++ [generic] Add support for DBTV embeds
+* [vk:wallpost] Fix audio extraction for new site layout
+* [vk] Fix authentication
++ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
++ [discoverygo] Add support for another GO network sites
+
+
+version 2016.08.17
+
+Core
++ Add _get_netrc_login_info
+
+Extractors
+* [mofosex] Extract all formats (#10335)
++ [generic] Add support for vbox7 embeds
++ [vbox7] Add support for embed URLs
++ [viafree] Add extractor (#10358)
++ [mtg] Add support for viafree URLs (#10358)
+* [theplatform] Extract all subtitles per language
++ [xvideos] Fix HLS extraction (#10356)
++ [amcnetworks] Add extractor
++ [bbc:playlist] Add support for pagination (#10349)
++ [fxnetworks] Add extractor (#9462)
+* [cbslocal] Fix extraction for SendtoNews-based videos
+* [sendtonews] Fix extraction
+* [jwplatform] Extract video id from JWPlayer data
+- [zippcast] Remove extractor (#10332)
++ [viceland] Add extractor (#8799)
++ [adobepass] Add base extractor for Adobe Pass Authentication
+* [life:embed] Improve extraction
+* [vgtv] Detect geo restricted videos (#10348)
++ [uplynk] Add extractor
+* [xiami] Fix extraction (#10342)
+
+
+version 2016.08.13
+
+Core
+* Show progress for curl external downloader
+* Forward more options to curl external downloader
+
+Extractors
+* [pbs] Fix description extraction
+* [franceculture] Fix extraction (#10324)
+* [pornotube] Fix extraction (#10322)
+* [4tube] Fix metadata extraction (#10321)
+* [imgur] Fix width and height extraction (#10325)
+* [expotv] Improve extraction
++ [vbox7] Fix extraction (#10309)
+- [tapely] Remove extractor (#10323)
+* [muenchentv] Fix extraction (#10313)
++ [24video] Add support for .me and .xxx TLDs
+* [24video] Fix comment count extraction
+* [sunporno] Add support for embed URLs
+* [sunporno] Fix metadata extraction (#10316)
++ [hgtv] Add extractor for hgtv.ca (#3999)
+- [pbs] Remove request to unavailable API
++ [pbs] Add support for high quality HTTP formats
++ [crunchyroll] Add support for HLS formats (#10301)
+
+
+version 2016.08.12
+
+Core
+* Subtitles are now written as is. Newline conversions are disabled. (#10268)
++ Recognize more formats in unified_timestamp
+
+Extractors
+- [goldenmoustache] Remove extractor (#10298)
+* [drtuber] Improve title extraction
+* [drtuber] Make dislike count optional (#10297)
+* [chirbit] Fix extraction (#10296)
+* [francetvinfo] Relax URL regular expression
+* [rtlnl] Relax URL regular expression (#10282)
+* [formula1] Relax URL regular expression (#10283)
+* [wat] Improve extraction (#10281)
+* [ctsnews] Fix extraction
+
+
+version 2016.08.10
+
+Core
+* Make --metadata-from-title non fatal when title does not match the pattern
+* Introduce options for randomized sleep before each download
+ --min-sleep-interval and --max-sleep-interval (#9930)
+* Respect default in _search_json_ld
+
+Extractors
++ [uol] Add extractor for uol.com.br (#4263)
+* [rbmaradio] Fix extraction and extract all formats (#10242)
++ [sonyliv] Add extractor for sonyliv.com (#10258)
+* [aparat] Fix extraction
+* [cwtv] Extract HTTP formats
++ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
+* [kuwo:singer] Fix extraction
+
+
+version 2016.08.07
+
+Core
++ Add support for TV Parental Guidelines ratings in parse_age_limit
++ Add decode_png (#9706)
++ Add support for partOfTVSeries in JSON-LD
+* Lower master M3U8 manifest preference for better format sorting
+
+Extractors
++ [discoverygo] Add extractor (#10245)
+* [flipagram] Make JSON-LD extraction non fatal
+* [generic] Make JSON-LD extraction non fatal
++ [bbc] Add support for morph embeds (#10239)
+* [tnaflixnetworkbase] Improve title extraction
+* [tnaflix] Fix metadata extraction (#10249)
+* [fox] Fix theplatform release URL query
+* [openload] Fix extraction (#9706)
+* [bbc] Skip duplicate manifest URLs
+* [bbc] Improve format code
++ [bbc] Add support for DASH and F4M
+* [bbc] Improve format sorting and listing
+* [bbc] Improve playlist extraction
++ [pokemon] Add extractor (#10093)
++ [condenast] Add fallback scenario for video info extraction
+
+
+version 2016.08.06
+
+Core
+* Add support for JSON-LD root list entries (#10203)
+* Improve unified_timestamp
+* Lower preference of RTSP formats in generic sorting
++ Add support for multiple properties in _og_search_property
+* Improve password hiding from verbose output
+
+Extractors
++ [adultswim] Add support for trailers (#10235)
+* [archiveorg] Improve extraction (#10219)
++ [jwplatform] Add support for playlists
++ [jwplatform] Add support for relative URLs
+* [jwplatform] Improve audio detection
++ [tvplay] Capture and output native error message
++ [tvplay] Extract series metadata
++ [tvplay] Add support for subtitles (#10194)
+* [tvp] Improve extraction (#7799)
+* [cbslocal] Fix timestamp parsing (#10213)
++ [naver] Add support for subtitles (#8096)
+* [naver] Improve extraction
+* [condenast] Improve extraction
+* [engadget] Relax URL regular expression
+* [5min] Fix extraction
++ [nationalgeographic] Add support for Episode Guide
++ [kaltura] Add support for subtitles
+* [kaltura] Optimize network requests
++ [vodplatform] Add extractor for vod-platform.net
+- [gamekings] Remove extractor
+* [limelight] Extract HTTP formats
+* [ntvru] Fix extraction
++ [comedycentral] Re-add :tds and :thedailyshow shortnames
+
+
+version 2016.08.01
+
+Fixed/improved extractors
+- [yandexmusic:track] Adapt to changes in track location JSON (#10193)
+- [bloomberg] Support another form of player (#10187)
+- [limelight] Skip DRM protected videos
+- [safari] Relax regular expressions for URL matching (#10202)
+- [cwtv] Add support for cwtvpr.com (#10196)
+
+
+version 2016.07.30
+
+Fixed/improved extractors
+- [twitch:clips] Sort formats
+- [tv2] Use m3u8_native
+- [tv2:article] Fix video detection (#10188)
+- rtve (#10076)
+- [dailymotion:playlist] Optimize download archive processing (#10180)
+
+
+version 2016.07.28
+
+Fixed/improved extractors
+- shared (#10170)
+- soundcloud (#10179)
+- twitch (#9767)
+
+
+version 2016.07.26.2
+
+Fixed/improved extractors
+- smotri
+- camdemy
+- mtv
+- comedycentral
+- cmt
+- cbc
+- mgtv
+- orf
+
+
+version 2016.07.24
+
+New extractors
+- arkena (#8682)
+- lcp (#8682)
+
+Fixed/improved extractors
+- facebook (#10151)
+- dailymail
+- telegraaf
+- dcn
+- onet
+- tvp
+
+Miscellaneous
+- Support $Time$ in DASH manifests
+
+
+version 2016.07.22
+
+New extractors
+- odatv (#9285)
+
+Fixed/improved extractors
+- bbc
+- youjizz (#10131)
+- youtube (#10140)
+- pornhub (#10138)
+- eporner (#10139)
+
+
+version 2016.07.17
+
+New extractors
+- nintendo (#9986)
+- streamable (#9122)
+
+Fixed/improved extractors
+- ard (#10095)
+- mtv
+- comedycentral (#10101)
+- viki (#10098)
+- spike (#10106)
+
+Miscellaneous
+- Improved twitter player detection (#10090)
+
+
+version 2016.07.16
+
+New extractors
+- ninenow (#5181)
+
+Fixed/improved extractors
+- rtve (#10076)
+- brightcove
+- 3qsdn
+- syfy (#9087, #3820, #2388)
+- youtube (#10083)
+
+Miscellaneous
+- Fix subtitle embedding for video-only and audio-only files (#10081)
+
+
+version 2016.07.13
+
+New extractors
+- rudo
+
+Fixed/improved extractors
+- biobiochiletv
+- tvplay
+- dbtv
+- brightcove
+- tmz
+- youtube (#10059)
+- shahid (#10062)
+- vk
+- ellentv (#10067)
+
+
+version 2016.07.11
+
+New Extractors
+- roosterteeth (#9864)
+
+Fixed/improved extractors
+- miomio (#9605)
+- vuclip
+- youtube
+- vidzi (#10058)
+
+
+version 2016.07.09.2
+
+Fixed/improved extractors
+- vimeo (#1638)
+- facebook (#10048)
+- lynda (#10047)
+- animeondemand
+
+Fixed/improved features
+- Embedding subtitles no longer throws an error with problematic inputs (#9063)
+
+
+version 2016.07.09.1
+
+Fixed/improved extractors
+- youtube
+- ard
+- srmediathek (#9373)
+
+
+version 2016.07.09
+
+New extractors
+- Flipagram (#9898)
+
+Fixed/improved extractors
+- telecinco
+- toutv
+- radiocanada
+- tweakers (#9516)
+- lynda
+- nick (#7542)
+- polskieradio (#10028)
+- le
+- facebook (#9851)
+- mgtv
+- animeondemand (#10031)
+
+Fixed/improved features
+- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs
+ on non-Windows systems
+
+
+version 2016.07.07
+
+New extractors
+- kamcord (#10001)
+
+Fixed/improved extractors
+- spiegel (#10018)
+- metacafe (#8539, #3253)
+- onet (#9950)
+- francetv (#9955)
+- brightcove (#9965)
+- daum (#9972)
+
+
+version 2016.07.06
+
+Fixed/improved extractors
+- youtube (#10007, #10009)
+- xuite
+- stitcher
+- spiegel
+- slideshare
+- sandia
+- rtvnh
+- prosiebensat1
+- onionstudios
+
+
+version 2016.07.05
+
+Fixed/improved extractors
+- brightcove
+- yahoo (#9995)
+- pornhub (#9997)
+- iqiyi
+- kaltura (#5557)
+- la7
+- Changed features
+- Rename --cn-verification-proxy to --geo-verification-proxy
+Miscellaneous
+- Add script for displaying downloads statistics
+
+
+version 2016.07.03.1
+
+Fixed/improved extractors
+- theplatform
+- aenetworks
+- nationalgeographic
+- hrti (#9482)
+- facebook (#5701)
+- buzzfeed (#5701)
+- rai (#8617, #9157, #9232, #8552, #8551)
+- nationalgeographic (#9991)
+- iqiyi
+
+
+version 2016.07.03
+
+New extractors
+- hrti (#9482)
+
+Fixed/improved extractors
+- vk (#9981)
+- facebook (#9938)
+- xtube (#9953, #9961)
+
+
+version 2016.07.02
+
+New extractors
+- fusion (#9958)
+
+Fixed/improved extractors
+- twitch (#9975)
+- vine (#9970)
+- periscope (#9967)
+- pornhub (#8696)
+
+
+version 2016.07.01
+
+New extractors
+- 9c9media
+- ctvnews (#2156)
+- ctv (#4077)
+
+Fixed/Improved extractors
+- rds
+- meta (#8789)
+- pornhub (#9964)
+- sixplay (#2183)
+
+New features
+- Accept quoted strings across multiple lines (#9940)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0e259d4
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,121 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+ HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display,
+ communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+ likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+ v. rights protecting the extraction, dissemination, use and reuse of data
+ in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation
+ thereof, including any amended or successor version of such
+ directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+ world based on applicable law or treaty, and any national
+ implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+ warranties of any kind concerning the Work, express, implied,
+ statutory or otherwise, including without limitation warranties of
+ title, merchantability, fitness for a particular purpose, non
+ infringement, or the absence of latent or other defects, accuracy, or
+ the present or absence of errors, whether or not discoverable, all to
+ the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without
+ limitation any person's Copyright and Related Rights in the Work.
+ Further, Affirmer disclaims responsibility for obtaining any necessary
+ consents, permissions or other rights required for any use of the
+ Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to
+ this CC0 or use of the Work.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..72879c5
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,9 @@
+include README.md
+include LICENSE
+include AUTHORS
+include ChangeLog
+include hypervideo.bash-completion
+include hypervideo.fish
+include hypervideo.1
+recursive-include docs Makefile conf.py *.rst
+recursive-include test *
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8608982
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,123 @@
+all: hypervideo README.md CONTRIBUTING.md README.txt hypervideo.1 hypervideo.bash-completion hypervideo.zsh hypervideo.fish
+
+clean:
+ rm -rf hypervideo.1.temp.md hypervideo.1 hypervideo.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ hypervideo.tar.gz hypervideo.zsh hypervideo.fish hypervideo_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp hypervideo hypervideo.exe
+ find . -name "*.pyc" -delete
+ find . -name "*.class" -delete
+
+PREFIX ?= /usr/local
+BINDIR ?= $(PREFIX)/bin
+MANDIR ?= $(PREFIX)/man
+SHAREDIR ?= $(PREFIX)/share
+PYTHON ?= /usr/bin/env python
+
+# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
+SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
+
+# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
+MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
+
+install: hypervideo hypervideo.1 hypervideo.bash-completion hypervideo.zsh hypervideo.fish
+ install -d $(DESTDIR)$(BINDIR)
+ install -m 755 hypervideo $(DESTDIR)$(BINDIR)
+ install -d $(DESTDIR)$(MANDIR)/man1
+ install -m 644 hypervideo.1 $(DESTDIR)$(MANDIR)/man1
+ install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
+ install -m 644 hypervideo.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/hypervideo
+ install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions
+ install -m 644 hypervideo.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_hypervideo
+ install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
+ install -m 644 hypervideo.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/hypervideo.fish
+
+codetest:
+ flake8 .
+
+test:
+ nosetests --verbose test
+ $(MAKE) codetest
+
+ot: offlinetest
+
+# Keep this list in sync with devscripts/run_tests.sh
+offlinetest: codetest
+ $(PYTHON) -m nose --verbose test \
+ --exclude test_age_restriction.py \
+ --exclude test_download.py \
+ --exclude test_socks.py \
+ --exclude test_subtitles.py \
+ --exclude test_write_annotations.py \
+ --exclude test_youtube_lists.py \
+ --exclude test_youtube_signature.py
+
+tar: hypervideo.tar.gz
+
+.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest
+
+pypi-files: hypervideo.bash-completion README.txt hypervideo.1 hypervideo.fish
+
+hypervideo: hypervideo_dl/*.py hypervideo_dl/*/*.py
+ mkdir -p zip
+ for d in hypervideo_dl hypervideo_dl/downloader hypervideo_dl/extractor hypervideo_dl/postprocessor ; do \
+ mkdir -p zip/$$d ;\
+ cp -pPR $$d/*.py zip/$$d/ ;\
+ done
+ touch -t 200001010101 zip/hypervideo_dl/*.py zip/hypervideo_dl/*/*.py
+ mv zip/hypervideo_dl/__main__.py zip/
+ cd zip ; zip -q ../hypervideo hypervideo_dl/*.py hypervideo_dl/*/*.py __main__.py
+ rm -rf zip
+ echo '#!$(PYTHON)' > hypervideo
+ cat hypervideo.zip >> hypervideo
+ rm hypervideo.zip
+ chmod a+x hypervideo
+
+README.md: hypervideo_dl/*.py hypervideo_dl/*/*.py
+ COLUMNS=80 $(PYTHON) hypervideo_dl/__main__.py --help | $(PYTHON) devscripts/make_readme.py
+
+CONTRIBUTING.md: README.md
+ $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
+
+README.txt: README.md
+ pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
+
+hypervideo.1: README.md
+ $(PYTHON) devscripts/prepare_manpage.py hypervideo.1.temp.md
+ pandoc -s -f $(MARKDOWN) -t man hypervideo.1.temp.md -o hypervideo.1
+ rm -f hypervideo.1.temp.md
+
+hypervideo.bash-completion: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/bash-completion.in
+ $(PYTHON) devscripts/bash-completion.py
+
+bash-completion: hypervideo.bash-completion
+
+hypervideo.zsh: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/zsh-completion.in
+ $(PYTHON) devscripts/zsh-completion.py
+
+zsh-completion: hypervideo.zsh
+
+hypervideo.fish: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/fish-completion.in
+ $(PYTHON) devscripts/fish-completion.py
+
+fish-completion: hypervideo.fish
+
+lazy-extractors: hypervideo_dl/extractor/lazy_extractors.py
+
+_EXTRACTOR_FILES = $(shell find hypervideo_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
+hypervideo_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
+ $(PYTHON) devscripts/make_lazy_extractors.py $@
+
+hypervideo.tar.gz: hypervideo README.md README.txt hypervideo.1 hypervideo.bash-completion hypervideo.zsh hypervideo.fish ChangeLog AUTHORS
+ @tar -czf hypervideo.tar.gz --transform "s|^|hypervideo/|" --owner 0 --group 0 \
+ --exclude '*.DS_Store' \
+ --exclude '*.kate-swp' \
+ --exclude '*.pyc' \
+ --exclude '*.pyo' \
+ --exclude '*~' \
+ --exclude '__pycache__' \
+ --exclude '.git' \
+ --exclude 'docs/_build' \
+ -- \
+ bin devscripts test hypervideo_dl docs \
+ ChangeLog AUTHORS LICENSE README.md README.txt \
+ Makefile MANIFEST.in hypervideo.1 hypervideo.bash-completion \
+ hypervideo.zsh hypervideo.fish setup.py setup.cfg \
+ hypervideo
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7e5b600
--- /dev/null
+++ b/README.md
@@ -0,0 +1,1447 @@
+hypervideo - A fork of youtube-dl without nonfree parts
+
+- [INSTALLATION](#installation)
+- [DESCRIPTION](#description)
+- [OPTIONS](#options)
+- [CONFIGURATION](#configuration)
+- [OUTPUT TEMPLATE](#output-template)
+- [FORMAT SELECTION](#format-selection)
+- [VIDEO SELECTION](#video-selection)
+- [FAQ](#faq)
+- [DEVELOPER INSTRUCTIONS](#developer-instructions)
+- [EMBEDDING HYPERVIDEO](#embedding-hypervideo)
+- [BUGS](#bugs)
+- [COPYRIGHT](#copyright)
+
+# INSTALLATION
+
+A fork of [youtube-dl](https://ytdl-org.github.io/youtube-dl/) without nonfree parts
+
+Hypervideo is distributed for [Hyperbola GNU/Linux-libre](https://www.hyperbola.info/).
+
+To install on Hyperbola run:
+
+```console
+# pacman -S hypervideo
+```
+
+# DESCRIPTION
+**hypervideo** A fork of youtube-dl without nonfree parts
+
+ $ hypervideo [OPTIONS] URL [URL...]
+
+# OPTIONS
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -i, --ignore-errors Continue on download errors, for
+ example to skip unavailable videos in a
+ playlist
+ --abort-on-error Abort downloading of further videos (in
+ the playlist or the command line) if an
+ error occurs
+ --dump-user-agent Display the current browser
+ identification
+ --list-extractors List all supported extractors
+ --extractor-descriptions Output descriptions of all supported
+ extractors
+ --force-generic-extractor Force extraction to use the generic
+ extractor
+ --default-search PREFIX Use this prefix for unqualified URLs.
+ For example "gvsearch2:" downloads two
+ videos from google videos for
+ hypervideo "large apple". Use the value
+ "auto" to let hypervideo guess
+ ("auto_warning" to emit a warning when
+ guessing). "error" just throws an
+ error. The default value "fixup_error"
+ repairs broken URLs, but emits an error
+ if this is not possible instead of
+ searching.
+ --ignore-config Do not read configuration files. When
+ given in the global configuration file
+ /etc/hypervideo.conf: Do not read the
+ user configuration in
+ ~/.config/hypervideo/config
+ (%APPDATA%/hypervideo/config.txt on
+ Windows)
+ --config-location PATH Location of the configuration file;
+ either the path to the config or its
+ containing directory.
+ --flat-playlist Do not extract the videos of a
+ playlist, only list them.
+ --mark-watched Mark videos watched (YouTube only)
+ --no-mark-watched Do not mark videos watched (YouTube
+ only)
+ --no-color Do not emit color codes in output
+
+## Network Options:
+ --proxy URL Use the specified HTTP/HTTPS/SOCKS
+ proxy. To enable SOCKS proxy, specify a
+ proper scheme. For example
+ socks5://127.0.0.1:1080/. Pass in an
+ empty string (--proxy "") for direct
+ connection
+ --socket-timeout SECONDS Time to wait before giving up, in
+ seconds
+ --source-address IP Client-side IP address to bind to
+ -4, --force-ipv4 Make all connections via IPv4
+ -6, --force-ipv6 Make all connections via IPv6
+
+## Geo Restriction:
+ --geo-verification-proxy URL Use this proxy to verify the IP address
+ for some geo-restricted sites. The
+ default proxy specified by --proxy (or
+ none, if the option is not present) is
+ used for the actual downloading.
+ --geo-bypass Bypass geographic restriction via
+ faking X-Forwarded-For HTTP header
+ --no-geo-bypass Do not bypass geographic restriction
+ via faking X-Forwarded-For HTTP header
+ --geo-bypass-country CODE Force bypass geographic restriction
+ with explicitly provided two-letter ISO
+ 3166-2 country code
+ --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction
+ with explicitly provided IP block in
+ CIDR notation
+
+## Video Selection:
+ --playlist-start NUMBER Playlist video to start at (default is
+ 1)
+ --playlist-end NUMBER Playlist video to end at (default is
+ last)
+ --playlist-items ITEM_SPEC Playlist video items to download.
+ Specify indices of the videos in the
+ playlist separated by commas like: "--
+ playlist-items 1,2,5,8" if you want to
+ download videos indexed 1, 2, 5, 8 in
+ the playlist. You can specify range: "
+ --playlist-items 1-3,7,10-13", it will
+ download the videos at index 1, 2, 3,
+ 7, 10, 11, 12 and 13.
+ --match-title REGEX Download only matching titles (regex or
+ caseless sub-string)
+ --reject-title REGEX Skip download for matching titles
+ (regex or caseless sub-string)
+ --max-downloads NUMBER Abort after downloading NUMBER files
+ --min-filesize SIZE Do not download any videos smaller than
+ SIZE (e.g. 50k or 44.6m)
+ --max-filesize SIZE Do not download any videos larger than
+ SIZE (e.g. 50k or 44.6m)
+ --date DATE Download only videos uploaded in this
+ date
+ --datebefore DATE Download only videos uploaded on or
+ before this date (i.e. inclusive)
+ --dateafter DATE Download only videos uploaded on or
+ after this date (i.e. inclusive)
+ --min-views COUNT Do not download any videos with less
+ than COUNT views
+ --max-views COUNT Do not download any videos with more
+ than COUNT views
+ --match-filter FILTER Generic video filter. Specify any key
+ (see the "OUTPUT TEMPLATE" for a list
+ of available keys) to match if the key
+ is present, !key to check if the key is
+ not present, key > NUMBER (like
+ "comment_count > 12", also works with
+ >=, <, <=, !=, =) to compare against a
+ number, key = 'LITERAL' (like "uploader
+ = 'Mike Smith'", also works with !=) to
+ match against a string literal and & to
+ require multiple matches. Values which
+ are not known are excluded unless you
+ put a question mark (?) after the
+ operator. For example, to only match
+ videos that have been liked more than
+ 100 times and disliked less than 50
+ times (or the dislike functionality is
+ not available at the given service),
+ but who also have a description, use
+ --match-filter "like_count > 100 &
+ dislike_count <? 50 & description" .
+ --no-playlist Download only the video, if the URL
+ refers to a video and a playlist.
+ --yes-playlist Download the playlist, if the URL
+ refers to a video and a playlist.
+ --age-limit YEARS Download only videos suitable for the
+ given age
+ --download-archive FILE Download only videos not listed in the
+ archive file. Record the IDs of all
+ downloaded videos in it.
+ --include-ads Download advertisements as well
+ (experimental)
+
+## Download Options:
+ -r, --limit-rate RATE Maximum download rate in bytes per
+ second (e.g. 50K or 4.2M)
+ -R, --retries RETRIES Number of retries (default is 10), or
+ "infinite".
+ --fragment-retries RETRIES Number of retries for a fragment
+ (default is 10), or "infinite" (DASH,
+ hlsnative and ISM)
+ --skip-unavailable-fragments Skip unavailable fragments (DASH,
+ hlsnative and ISM)
+ --abort-on-unavailable-fragment Abort downloading when some fragment is
+ not available
+ --keep-fragments Keep downloaded fragments on disk after
+ downloading is finished; fragments are
+ erased by default
+ --buffer-size SIZE Size of download buffer (e.g. 1024 or
+ 16K) (default is 1024)
+ --no-resize-buffer Do not automatically adjust the buffer
+ size. By default, the buffer size is
+ automatically resized from an initial
+ value of SIZE.
+ --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
+ downloading (e.g. 10485760 or 10M)
+ (default is disabled). May be useful
+ for bypassing bandwidth throttling
+ imposed by a webserver (experimental)
+ --playlist-reverse Download playlist videos in reverse
+ order
+ --playlist-random Download playlist videos in random
+ order
+ --xattr-set-filesize Set file xattribute ytdl.filesize with
+ expected file size
+ --hls-prefer-native Use the native HLS downloader instead
+ of ffmpeg
+ --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
+ downloader
+ --hls-use-mpegts Use the mpegts container for HLS
+ videos, allowing to play the video
+ while downloading (some players may not
+ be able to play it)
+ --external-downloader COMMAND Use the specified external downloader.
+ Currently supports aria2c,avconv,axel,c
+ url,ffmpeg,httpie,wget
+ --external-downloader-args ARGS Give these arguments to the external
+ downloader
+
+## Filesystem Options:
+ -a, --batch-file FILE File containing URLs to download ('-'
+ for stdin), one URL per line. Lines
+ starting with '#', ';' or ']' are
+ considered as comments and ignored.
+ --id Use only video ID in file name
+ -o, --output TEMPLATE Output filename template, see the
+ "OUTPUT TEMPLATE" for all the info
+ --output-na-placeholder PLACEHOLDER Placeholder value for unavailable meta
+ fields in output filename template
+ (default is "NA")
+ --autonumber-start NUMBER Specify the start value for
+ %(autonumber)s (default is 1)
+ --restrict-filenames Restrict filenames to only ASCII
+ characters, and avoid "&" and spaces in
+ filenames
+ -w, --no-overwrites Do not overwrite files
+ -c, --continue Force resume of partially downloaded
+ files. By default, hypervideo will
+ resume downloads if possible.
+ --no-continue Do not resume partially downloaded
+ files (restart from beginning)
+ --no-part Do not use .part files - write directly
+ into output file
+ --no-mtime Do not use the Last-modified header to
+ set the file modification time
+ --write-description Write video description to a
+ .description file
+ --write-info-json Write video metadata to a .info.json
+ file
+ --write-annotations Write video annotations to a
+ .annotations.xml file
+ --load-info-json FILE JSON file containing the video
+ information (created with the "--write-
+ info-json" option)
+ --cookies FILE File to read cookies from and dump
+ cookie jar in
+ --cache-dir DIR Location in the filesystem where
+ hypervideo can store some downloaded
+ information permanently. By default
+ $XDG_CACHE_HOME/hypervideo or
+ ~/.cache/hypervideo . At the moment,
+ only YouTube player files (for videos
+ with obfuscated signatures) are cached,
+ but that may change.
+ --no-cache-dir Disable filesystem caching
+ --rm-cache-dir Delete all filesystem cache files
+
+## Thumbnail Options:
+ --write-thumbnail Write thumbnail image to disk
+ --write-all-thumbnails Write all thumbnail image formats to
+ disk
+ --list-thumbnails Simulate and list all available
+ thumbnail formats
+
+## Verbosity / Simulation Options:
+ -q, --quiet Activate quiet mode
+ --no-warnings Ignore warnings
+ -s, --simulate Do not download the video and do not
+ write anything to disk
+ --skip-download Do not download the video
+ -g, --get-url Simulate, quiet but print URL
+ -e, --get-title Simulate, quiet but print title
+ --get-id Simulate, quiet but print id
+ --get-thumbnail Simulate, quiet but print thumbnail URL
+ --get-description Simulate, quiet but print video
+ description
+ --get-duration Simulate, quiet but print video length
+ --get-filename Simulate, quiet but print output
+ filename
+ --get-format Simulate, quiet but print output format
+ -j, --dump-json Simulate, quiet but print JSON
+ information. See the "OUTPUT TEMPLATE"
+ for a description of available keys.
+ -J, --dump-single-json Simulate, quiet but print JSON
+ information for each command-line
+ argument. If the URL refers to a
+ playlist, dump the whole playlist
+ information in a single line.
+ --print-json Be quiet and print the video
+ information as JSON (video is still
+ being downloaded).
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --console-title Display progress in console titlebar
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages encoded using
+ base64 to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to
+ files in the current directory to debug
+ problems
+ --print-traffic Display sent and read HTTP traffic
+ -C, --call-home Contact the hypervideo server for
+ debugging
+ --no-call-home Do NOT contact the hypervideo server
+ for debugging
+
+## Workarounds:
+ --encoding ENCODING Force the specified encoding
+ (experimental)
+ --no-check-certificate Suppress HTTPS certificate validation
+ --prefer-insecure Use an unencrypted connection to
+ retrieve information about the video.
+ (Currently supported only for YouTube)
+ --user-agent UA Specify a custom user agent
+ --referer URL Specify a custom referer, use if the
+ video access is restricted to one
+ domain
+ --add-header FIELD:VALUE Specify a custom HTTP header and its
+ value, separated by a colon ':'. You
+ can use this option multiple times
+ --bidi-workaround Work around terminals that lack
+ bidirectional text support. Requires
+ bidiv or fribidi executable in PATH
+ --sleep-interval SECONDS Number of seconds to sleep before each
+ download when used alone or a lower
+ bound of a range for randomized sleep
+ before each download (minimum possible
+ number of seconds to sleep) when used
+ along with --max-sleep-interval.
+ --max-sleep-interval SECONDS Upper bound of a range for randomized
+ sleep before each download (maximum
+ possible number of seconds to sleep).
+ Must only be used along with --min-
+ sleep-interval.
+
+## Video Format Options:
+ -f, --format FORMAT Video format code, see the "FORMAT
+ SELECTION" for all the info
+ --all-formats Download all available video formats
+ --prefer-free-formats Prefer free video formats unless a
+ specific one is requested
+ -F, --list-formats List all available formats of requested
+ videos
+ --youtube-skip-dash-manifest Do not download the DASH manifests and
+ related data on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g.
+ bestvideo+bestaudio), output to given
+ container format. One of mkv, mp4, ogg,
+ webm, flv. Ignored if no merge is
+ required
+
+## Subtitle Options:
+ --write-sub Write subtitle file
+ --write-auto-sub Write automatically generated subtitle
+ file (YouTube only)
+ --all-subs Download all the available subtitles of
+ the video
+ --list-subs List all available subtitles for the
+ video
+ --sub-format FORMAT Subtitle format, accepts formats
+ preference, for example: "srt" or
+ "ass/srt/best"
+ --sub-lang LANGS Languages of the subtitles to download
+ (optional) separated by commas, use
+ --list-subs for available language tags
+
+## Authentication Options:
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is
+ left out, hypervideo will ask
+ interactively.
+ -2, --twofactor TWOFACTOR Two-factor authentication code
+ -n, --netrc Use .netrc authentication data
+ --video-password PASSWORD Video password (vimeo, youku)
+
+## Adobe Pass Options:
+ --ap-mso MSO Adobe Pass multiple-system operator (TV
+ provider) identifier, use --ap-list-mso
+ for a list of available MSOs
+ --ap-username USERNAME Multiple-system operator account login
+ --ap-password PASSWORD Multiple-system operator account
+ password. If this option is left out,
+ hypervideo will ask interactively.
+ --ap-list-mso List all supported multiple-system
+ operators
+
+## Post-processing Options:
+ -x, --extract-audio Convert video files to audio-only files
+ (requires ffmpeg/avconv and
+ ffprobe/avprobe)
+ --audio-format FORMAT Specify audio format: "best", "aac",
+ "flac", "mp3", "m4a", "opus", "vorbis",
+ or "wav"; "best" by default; No effect
+ without -x
+ --audio-quality QUALITY Specify ffmpeg/avconv audio quality,
+ insert a value between 0 (better) and 9
+ (worse) for VBR or a specific bitrate
+ like 128K (default 5)
+ --recode-video FORMAT Encode the video to another format if
+ necessary (currently supported:
+ mp4|flv|ogg|webm|mkv|avi)
+ --postprocessor-args ARGS Give these arguments to the
+ postprocessor
+ -k, --keep-video Keep the video file on disk after the
+ post-processing; the video is erased by
+ default
+ --no-post-overwrites Do not overwrite post-processed files;
+ the post-processed files are
+ overwritten by default
+ --embed-subs Embed subtitles in the video (only for
+ mp4, webm and mkv videos)
+ --embed-thumbnail Embed thumbnail in the audio as cover
+ art
+ --add-metadata Write metadata to the video file
+ --metadata-from-title FORMAT Parse additional metadata like song
+ title / artist from the video title.
+ The format syntax is the same as
+ --output. Regular expression with named
+ capture groups may also be used. The
+ parsed parameters replace existing
+ values. Example: --metadata-from-title
+ "%(artist)s - %(title)s" matches a
+ title like "Coldplay - Paradise".
+ Example (regex): --metadata-from-title
+ "(?P<artist>.+?) - (?P<title>.+)"
+ --xattrs Write metadata to the video file's
+ xattrs (using dublin core and xdg
+ standards)
+ --fixup POLICY Automatically correct known faults of
+ the file. One of never (do nothing),
+ warn (only emit a warning),
+ detect_or_warn (the default; fix file
+ if we can, warn otherwise)
+ --prefer-avconv Prefer avconv over ffmpeg for running
+ the postprocessors
+ --prefer-ffmpeg Prefer ffmpeg over avconv for running
+ the postprocessors (default)
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
+ either the path to the binary or its
+ containing directory.
+ --exec CMD Execute a command on the file after
+ downloading and post-processing,
+ similar to find's -exec syntax.
+ Example: --exec 'adb push {}
+ /sdcard/Music/ && rm {}'
+ --convert-subs FORMAT Convert the subtitles to other format
+ (currently supported: srt|ass|vtt|lrc)
+
+# CONFIGURATION
+
+You can configure hypervideo by placing any supported command line option to a configuration file. On GNU+Linux and macOS, the system wide configuration file is located at `/etc/hypervideo.conf` and the user wide configuration file at `~/.config/hypervideo/config`. Note that by default configuration file may not exist so you may need to create it yourself.
+
+For example, with the following configuration file hypervideo will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
+
+```
+# Lines starting with # are comments
+
+# Always extract audio
+-x
+
+# Do not copy the mtime
+--no-mtime
+
+# Use this proxy
+--proxy 127.0.0.1:3128
+
+# Save all videos under Movies directory in your home directory
+-o ~/Movies/%(title)s.%(ext)s
+```
+
+Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
+
+You can use `--ignore-config` if you want to disable the configuration file for a particular hypervideo run.
+
+You can also use `--config-location` if you want to use custom configuration file for a particular hypervideo run.
+
+### Authentication with `.netrc` file
+
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every hypervideo execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
+```
+touch $HOME/.netrc
+chmod a-rwx,u+rw $HOME/.netrc
+```
+After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
+```
+machine <extractor> login <login> password <password>
+```
+For example:
+```
+machine youtube login myaccount@gmail.com password my_youtube_password
+machine twitch login my_twitch_account_name password my_twitch_password
+```
+To activate authentication with the `.netrc` file you should pass `--netrc` to hypervideo or place it in the [configuration file](#configuration).
+
+On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
+```
+set HOME=%USERPROFILE%
+```
+
+# OUTPUT TEMPLATE
+
+The `-o` option allows users to indicate a template for the output file names.
+
+**tl;dr:** [navigate me to examples](#output-template-examples).
+
+The basic usage is not to set any template arguments when downloading a single file, like in `hypervideo -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
+
+ - `id` (string): Video identifier
+ - `title` (string): Video title
+ - `url` (string): Video URL
+ - `ext` (string): Video filename extension
+ - `alt_title` (string): A secondary title of the video
+ - `display_id` (string): An alternative identifier for the video
+ - `uploader` (string): Full name of the video uploader
+ - `license` (string): License name the video is licensed under
+ - `creator` (string): The creator of the video
+ - `release_date` (string): The date (YYYYMMDD) when the video was released
+ - `timestamp` (numeric): UNIX timestamp of the moment the video became available
+ - `upload_date` (string): Video upload date (YYYYMMDD)
+ - `uploader_id` (string): Nickname or id of the video uploader
+ - `channel` (string): Full name of the channel the video is uploaded on
+ - `channel_id` (string): Id of the channel
+ - `location` (string): Physical location where the video was filmed
+ - `duration` (numeric): Length of the video in seconds
+ - `view_count` (numeric): How many users have watched the video on the platform
+ - `like_count` (numeric): Number of positive ratings of the video
+ - `dislike_count` (numeric): Number of negative ratings of the video
+ - `repost_count` (numeric): Number of reposts of the video
+ - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
+ - `comment_count` (numeric): Number of comments on the video
+ - `age_limit` (numeric): Age restriction for the video (years)
+ - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
+ - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
+ - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
+ - `format` (string): A human-readable description of the format
+ - `format_id` (string): Format code specified by `--format`
+ - `format_note` (string): Additional info about the format
+ - `width` (numeric): Width of the video
+ - `height` (numeric): Height of the video
+ - `resolution` (string): Textual description of width and height
+ - `tbr` (numeric): Average bitrate of audio and video in KBit/s
+ - `abr` (numeric): Average audio bitrate in KBit/s
+ - `acodec` (string): Name of the audio codec in use
+ - `asr` (numeric): Audio sampling rate in Hertz
+ - `vbr` (numeric): Average video bitrate in KBit/s
+ - `fps` (numeric): Frame rate
+ - `vcodec` (string): Name of the video codec in use
+ - `container` (string): Name of the container format
+ - `filesize` (numeric): The number of bytes, if known in advance
+ - `filesize_approx` (numeric): An estimate for the number of bytes
+ - `protocol` (string): The protocol that will be used for the actual download
+ - `extractor` (string): Name of the extractor
+ - `extractor_key` (string): Key name of the extractor
+ - `epoch` (numeric): Unix epoch when creating the file
+ - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
+ - `playlist` (string): Name or id of the playlist that contains the video
+ - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
+ - `playlist_id` (string): Playlist identifier
+ - `playlist_title` (string): Playlist title
+ - `playlist_uploader` (string): Full name of the playlist uploader
+ - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
+
+Available for the video that belongs to some logical chapter or section:
+
+ - `chapter` (string): Name or title of the chapter the video belongs to
+ - `chapter_number` (numeric): Number of the chapter the video belongs to
+ - `chapter_id` (string): Id of the chapter the video belongs to
+
+Available for the video that is an episode of some series or programme:
+
+ - `series` (string): Title of the series or programme the video episode belongs to
+ - `season` (string): Title of the season the video episode belongs to
+ - `season_number` (numeric): Number of the season the video episode belongs to
+ - `season_id` (string): Id of the season the video episode belongs to
+ - `episode` (string): Title of the video episode
+ - `episode_number` (numeric): Number of the video episode within a season
+ - `episode_id` (string): Id of the video episode
+
+Available for the media that is a track or a part of a music album:
+
+ - `track` (string): Title of the track
+ - `track_number` (numeric): Number of the track within an album or a disc
+ - `track_id` (string): Id of the track
+ - `artist` (string): Artist(s) of the track
+ - `genre` (string): Genre(s) of the track
+ - `album` (string): Title of the album the track belongs to
+ - `album_type` (string): Type of the album
+ - `album_artist` (string): List of all artists appeared on the album
+ - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
+ - `release_year` (numeric): Year (YYYY) when the album was released
+
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
+
+For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `hypervideo test video` and id `BaW_jenozKcj`, this will result in a `hypervideo test video-BaW_jenozKcj.mp4` file created in the current directory.
+
+For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
+
+Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
+
+To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
+
+The current default template is `%(title)s-%(id)s.%(ext)s`.
+
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+
+#### Output template and Windows batch files
+
+If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+
+#### Output template examples
+
+Note that on Windows you may need to use double quotes instead of single.
+
+```bash
+$ hypervideo --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
+hypervideo test video ''_ä↭𝕐.mp4 # All kinds of weird characters
+
+$ hypervideo --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
+hypervideo_test_video_.mp4 # A simple file name
+
+# Download YouTube playlist videos in separate directory indexed by video order in a playlist
+$ hypervideo -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
+
+# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
+$ hypervideo -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
+
+# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
+$ hypervideo -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
+
+# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
+$ hypervideo -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
+
+# Stream the video being downloaded to stdout
+$ hypervideo -o - BaW_jenozKc
+```
+
+# FORMAT SELECTION
+
+By default hypervideo tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, hypervideo will guess it for you by **default**.
+
+But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
+
+The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
+
+**tl;dr:** [navigate me to examples](#format-selection-examples).
+
+The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
+
+You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
+
+You can also use special names to select particular edge case formats:
+
+ - `best`: Select the best quality format represented by a single file with video and audio.
+ - `worst`: Select the worst quality format represented by a single file with video and audio.
+ - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
+ - `worstvideo`: Select the worst quality video-only format. May not be available.
+ - `bestaudio`: Select the best quality audio only-format. May not be available.
+ - `worstaudio`: Select the worst quality audio only-format. May not be available.
+
+For example, to download the worst quality video-only format you can use `-f worstvideo`.
+
+If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
+
+If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
+
+You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
+
+The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
+
+ - `filesize`: The number of bytes, if known in advance
+ - `width`: Width of the video, if known
+ - `height`: Height of the video, if known
+ - `tbr`: Average bitrate of audio and video in KBit/s
+ - `abr`: Average audio bitrate in KBit/s
+ - `vbr`: Average video bitrate in KBit/s
+ - `asr`: Audio sampling rate in Hertz
+ - `fps`: Frame rate
+
+Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
+
+ - `ext`: File extension
+ - `acodec`: Name of the audio codec in use
+ - `vcodec`: Name of the video codec in use
+ - `container`: Name of the container format
+ - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
+ - `format_id`: A short description of the format
+ - `language`: Language code
+
+Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
+
+Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
+
+Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
+
+You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
+
+Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
+
+Since the end of April 2015 and version 2015.04.26, hypervideo uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+
+If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run hypervideo.
+
+#### Format selection examples
+
+Note that on Windows you may need to use double quotes instead of single.
+
+```bash
+# Download best mp4 format available or any other best if no mp4 available
+$ hypervideo -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
+
+# Download best format available but no better than 480p
+$ hypervideo -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
+
+# Download best video only format but no bigger than 50 MB
+$ hypervideo -f 'best[filesize<50M]'
+
+# Download best format available via direct link over HTTP/HTTPS protocol
+$ hypervideo -f '(bestvideo+bestaudio/best)[protocol^=http]'
+
+# Download the best video format and the best audio format without merging them
+$ hypervideo -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
+```
+Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
+
+
+# VIDEO SELECTION
+
+Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`. They accept dates in two formats:
+
+ - Absolute dates: Dates in the format `YYYYMMDD`.
+ - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
+
+Examples:
+
+```bash
+# Download only the videos uploaded in the last 6 months
+$ hypervideo --dateafter now-6months
+
+# Download only the videos uploaded on January 1, 1970
+$ hypervideo --date 19700101
+
+$ # Download only the videos uploaded in the 200x decade
+$ hypervideo --dateafter 20000101 --datebefore 20091231
+```
+
+# FAQ
+
+### How do I update hypervideo?
+
+If you have used pacman, a simple `doas pacman -Syu hypervideo` is sufficient to update.
+
+As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
+
+```console
+$ doas pacman -Rs hypervideo
+```
+
+### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
+
+YouTube changed their playlist format in March 2014 and later on, so you'll need at least hypervideo to download all YouTube videos.
+
+### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
+
+Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
+
+### Do I always have to pass `-citw`?
+
+By default, hypervideo intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
+
+### Can you please put the `-b` option back?
+
+Most people asking this question are not aware that hypervideo now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and hypervideo will try to download it.
+
+### I get HTTP error 402 when trying to download a video. What's this?
+
+Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart hypervideo.
+
+### Do I need any other programs?
+
+hypervideo works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. hypervideo will detect whether avconv/ffmpeg is present and automatically pick the best option.
+
+Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
+
+### I have downloaded a video but how can I play it?
+
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
+
+### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
+
+It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by hypervideo. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
+
+It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
+
+Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using `-g`, your own downloader must support these as well.
+
+If you want to play the video on a machine that is not running hypervideo, you can relay the video content from the machine that runs hypervideo. You can use `-o -` to let hypervideo stream a video to stdout, or simply allow the player to download the files written by hypervideo in turn.
+
+### ERROR: no fmt_url_map or conn information found in video info
+
+YouTube has switched to a new video info format in July 2011 which is not supported by old versions of hypervideo. See [above](#how-do-i-update-hypervideo) for how to update hypervideo.
+
+### ERROR: unable to download video
+
+YouTube requires an additional signature since September 2012 which is not supported by old versions of hypervideo. See [above](#how-do-i-update-hypervideo) for how to hypervideo.
+
+### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command`
+
+That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by the shell preventing you from passing the whole URL to hypervideo. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
+
+For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
+
+```console
+$ hypervideo 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'
+```
+
+or
+
+```console
+$ hypervideo https://www.youtube.com/watch?t=4\&v=BaW_jenozKc
+```
+
+### ExtractorError: Could not find JS function u'OF'
+
+In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of hypervideo. See [above](#how-do-i-update-hypervideo) for how to update hypervideo.
+
+### HTTP Error 429: Too Many Requests or 402: Payment Required
+
+These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-hypervideo) to hypervideo. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
+
+If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
+
+### SyntaxError: Non-ASCII character
+
+The error
+
+ File "hypervideo", line 2
+ SyntaxError: Non-ASCII character '\x93' ...
+
+means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
+
+### What is this binary file? Where has the code gone?
+
+Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) hypervideo is packed as an executable zipfile, simply unzip it (might need renaming to `hypervideo.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make hypervideo`.
+
+### The exe throws an error due to missing `MSVCR100.dll`
+
+To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe).
+
+### On Windows, how should I set up ffmpeg and hypervideo? Where should I put the exe files?
+
+If you put hypervideo and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
+
+To make a different directory work - either for ffmpeg, or for hypervideo, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
+
+From then on, after restarting your shell, you will be able to access both hypervideo and ffmpeg (and hypervideo will be able to find ffmpeg) by simply typing `hypervideo` or `ffmpeg`, no matter what directory you're in.
+
+### How do I put downloads into a specific folder?
+
+Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
+
+### How do I download a video starting with a `-`?
+
+Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
+
+ $ hypervideo -- -wNyEUrxzFU
+ $ hypervideo "https://www.youtube.com/watch?v=-wNyEUrxzFU"
+
+### How do I pass cookies to hypervideo?
+
+Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
+
+In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
+
+Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
+
+Passing cookies to hypervideo is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
+
+### How do I stream directly to media player?
+
+You will first need to tell hypervideo to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
+
+ $ hypervideo -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
+
+### How do I download only new videos from a playlist?
+
+Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
+
+For example, at first,
+
+ $ hypervideo --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
+
+ $ hypervideo --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+### Should I add `--hls-prefer-native` into my config?
+
+When hypervideo detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/hypervideo each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
+
+When hypervideo knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, hypervideo will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of hypervideo, with improvements of the built-in downloader and/or ffmpeg.
+
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+
+If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
+
+### Can you add support for this anime video site, or site which shows current movies for free?
+
+As a matter of policy (as well as legality), hypervideo does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to hypervideo.
+
+A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into hypervideo. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
+
+Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
+
+### How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The hypervideo core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
+
+First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the hypervideo project has grown too large to use personal email as an effective communication channel.
+
+Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the hypervideo developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in hypervideo version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
+
+### How can I detect whether a given URL is supported by hypervideo?
+
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and hypervideo reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+
+It is *not* possible to detect whether a URL is supported or not. That's because hypervideo contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
+
+If you want to find out whether a given URL is supported, simply call hypervideo with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run hypervideo on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
+
+# Why do I need to go through that much red tape when filing bugs?
+
+Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in hypervideo but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to hypervideo.
+
+hypervideo is an free software project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `hypervideo -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of hypervideo is current.
+
+# DEVELOPER INSTRUCTIONS
+
+Most users do not need to build hypervideo and can [download the builds](https://git.conocimientoslibres.ga/software/hypervideo.git) or get them from their distribution.
+
+To run hypervideo as a developer, you don't need to build anything either. Simply execute
+
+ $ python -m hypervideo_dl
+
+To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
+
+ $ python -m unittest discover
+ $ python test/test_download.py
+ $ nosetests
+
+See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+
+If you want to create a build of hypervideo yourself, you'll need
+
+* python
+* make (only GNU make is supported)
+* pandoc
+* zip
+* nosetests
+
+### Adding support for a new site
+
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. hypervideo does **not support** such sites thus pull requests adding support for them **will be rejected**.
+
+After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
+
+1. [Fork this repository](https://git.conocimientoslibres.ga/software/hypervideo.git)
+2. Check out the source code with:
+
+ $ git clone https://git.conocimientoslibres.ga/software/hypervideo.git
+
+3. Start a new git branch with
+
+ $ cd hypervideo
+ $ git checkout -b yourextractor
+
+4. Start with this simple template and save it to `hypervideo_dl/extractor/yourextractor.py`:
+
+ ```python
+ # coding: utf-8
+ from __future__ import unicode_literals
+
+ from .common import InfoExtractor
+
+
+ class YourExtractorIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://yourextractor.com/watch/42',
+ 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+ 'info_dict': {
+ 'id': '42',
+ 'ext': 'mp4',
+ 'title': 'Video title goes here',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ # TODO more properties, either as:
+ # * A value
+ # * MD5 checksum; start the string with md5:
+ # * A regular expression; start the string with re:
+ # * Any Python type (for example int or float)
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # TODO more code goes here, for example ...
+ title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+ # TODO more properties (see hypervideo_dl/extractor/common.py)
+ }
+ ```
+5. Add an import in [`hypervideo_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/extractors.py).
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
+7. Have a look at [`hypervideo_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+8. Make sure your code follows [hypervideo coding conventions](#hypervideo-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+
+ $ flake8 hypervideo_dl/extractor/yourextractor.py
+
+9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by hypervideo, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
+
+ $ git add hypervideo_dl/extractor/extractors.py
+ $ git add hypervideo_dl/extractor/yourextractor.py
+ $ git commit -m '[yourextractor] Add new extractor'
+ $ git push origin yourextractor
+
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+
+In any case, thank you very much for your contributions!
+
+## hypervideo coding conventions
+
+This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
+
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old hypervideo versions working. Even though this breakage issue is easily fixed by emitting a new version of hypervideo with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
+
+### Mandatory and optional metafields
+
+For extraction to work hypervideo relies on metadata your extractor extracts and provides to hypervideo expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+
+ - `id` (media identifier)
+ - `title` (media title)
+ - `url` (media download URL) or `formats`
+
+In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention hypervideo also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
+
+[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+
+#### Example
+
+Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
+
+```python
+meta = self._download_json(url, video_id)
+```
+
+Assume at this point `meta`'s layout is:
+
+```python
+{
+ ...
+ "summary": "some fancy summary text",
+ ...
+}
+```
+
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+
+```python
+description = meta.get('summary') # correct
+```
+
+and not like:
+
+```python
+description = meta['summary'] # incorrect
+```
+
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
+
+Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
+
+```python
+description = self._search_regex(
+ r'<span[^>]+id="title"[^>]*>([^<]+)<',
+ webpage, 'description', fatal=False)
+```
+
+With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
+
+You can also pass `default=<some fallback value>`, for example:
+
+```python
+description = self._search_regex(
+ r'<span[^>]+id="title"[^>]*>([^<]+)<',
+ webpage, 'description', default=None)
+```
+
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
+
+### Provide fallbacks
+
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
+
+#### Example
+
+Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
+
+```python
+title = meta['title']
+```
+
+If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
+
+Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
+
+```python
+title = meta.get('title') or self._og_search_title(webpage)
+```
+
+This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
+
+### Regular expressions
+
+#### Don't capture groups you don't use
+
+Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
+
+##### Example
+
+Don't capture id attribute name here since you can't use it for anything anyway.
+
+Correct:
+
+```python
+r'(?:id|ID)=(?P<id>\d+)'
+```
+
+Incorrect:
+```python
+r'(id|ID)=(?P<id>\d+)'
+```
+
+
+#### Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
+
+##### Example
+
+Say you need to extract `title` from the following HTML code:
+
+```html
+<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
+```
+
+The code for that task should look similar to:
+
+```python
+title = self._search_regex(
+ r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
+```
+
+Or even better:
+
+```python
+title = self._search_regex(
+ r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
+ webpage, 'title', group='title')
+```
+
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
+
+The code definitely should not look like:
+
+```python
+title = self._search_regex(
+ r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
+ webpage, 'title', group='title')
+```
+
+### Long lines policy
+
+There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
+
+For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+
+Correct:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Incorrect:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'<title>([^<]+)</title>'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+```
+
+Incorrect:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list,
+)
+```
+
+### Use convenience conversion and parsing functions
+
+Wrap all extracted numeric data into safe functions from [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+
+Explore [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
+
+# EMBEDDING HYPERVIDEO
+
+hypervideo makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
+
+From a Python program, you can embed hypervideo in a more powerful fashion, like this:
+
+```python
+from __future__ import unicode_literals
+import hypervideo_dl
+
+ydl_opts = {}
+with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
+ ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
+```
+
+Most likely, you'll want to use various options. For a list of options available, have a look at [`hypervideo_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/hypervideo_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+
+Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
+
+```python
+from __future__ import unicode_literals
+import hypervideo_dl
+
+
+class MyLogger(object):
+ def debug(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ print(msg)
+
+
+def my_hook(d):
+ if d['status'] == 'finished':
+ print('Done downloading, now converting ...')
+
+
+ydl_opts = {
+ 'format': 'bestaudio/best',
+ 'postprocessors': [{
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': 'mp3',
+ 'preferredquality': '192',
+ }],
+ 'logger': MyLogger(),
+ 'progress_hooks': [my_hook],
+}
+with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
+ ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
+```
+
+# BUGS
+
+Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+
+**Please include the full output of hypervideo when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
+
+```
+$ hypervideo -v <your command line>
+[debug] System config: []
+[debug] User config: []
+[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+[debug] hypervideo version 1.1.11
+[debug] Git HEAD: 135392e
+[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
+[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+[debug] Proxy map: {}
+...
+```
+
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
+
+The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
+
+Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+
+### Is the description of the issue itself sufficient?
+
+We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
+
+So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+
+- What the problem is
+- How it could be fixed
+- How your proposed solution would look like
+
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+
+For bug reports, this means that your report should contain the *complete* output of hypervideo when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
+
+If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
+
+### Are you using the latest version?
+
+Before reporting any issue, type `doas pacman -Sy hypervideo`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
+
+### Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+
+### Why are existing options not enough?
+
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+
+### Is there enough context in your bug report?
+
+People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
+
+We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
+
+### Does the issue involve one problem, and one problem only?
+
+Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
+
+In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of hypervideo that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
+
+### Is anyone going to need the feature?
+
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+
+### Is your question about hypervideo?
+
+It may sound strange, but some bug reports we receive are completely unrelated to hypervideo and relate to a different, or even the reporter's own, application. Please make sure that you are actually using hypervideo. If you are using a UI for hypervideo, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for hypervideo fails in some way you believe is related to hypervideo, by all means, go ahead and report the bug.
+
+# COPYRIGHT
+
+hypervideo is released into the [CC0 1.0 Universal](LICENSE) by the copyright holders.
+
+This README file was originally written by [Daniel Bolton](https://github.com/dbbolton) and is likewise released into the public domain.
diff --git a/bin/hypervideo b/bin/hypervideo
new file mode 100755
index 0000000..73bf9b0
--- /dev/null
+++ b/bin/hypervideo
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+import hypervideo_dl
+
+if __name__ == '__main__':
+ hypervideo_dl.main()
diff --git a/devscripts/SizeOfImage.patch b/devscripts/SizeOfImage.patch
new file mode 100644
index 0000000..d5845af
--- /dev/null
+++ b/devscripts/SizeOfImage.patch
Binary files differ
diff --git a/devscripts/SizeOfImage_w.patch b/devscripts/SizeOfImage_w.patch
new file mode 100644
index 0000000..c1a338f
--- /dev/null
+++ b/devscripts/SizeOfImage_w.patch
Binary files differ
diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
new file mode 100644
index 0000000..0cc81b0
--- /dev/null
+++ b/devscripts/bash-completion.in
@@ -0,0 +1,29 @@
+__hypervideo_dl()
+{
+ local cur prev opts fileopts diropts keywords
+ COMPREPLY=()
+ cur="${COMP_WORDS[COMP_CWORD]}"
+ prev="${COMP_WORDS[COMP_CWORD-1]}"
+ opts="{{flags}}"
+ keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
+ fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
+ diropts="--cache-dir"
+
+ if [[ ${prev} =~ ${fileopts} ]]; then
+ COMPREPLY=( $(compgen -f -- ${cur}) )
+ return 0
+ elif [[ ${prev} =~ ${diropts} ]]; then
+ COMPREPLY=( $(compgen -d -- ${cur}) )
+ return 0
+ fi
+
+ if [[ ${cur} =~ : ]]; then
+ COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
+ return 0
+ elif [[ ${cur} == * ]] ; then
+ COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
+ return 0
+ fi
+}
+
+complete -F __hypervideo_dl hypervideo
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
new file mode 100755
index 0000000..12abd45
--- /dev/null
+++ b/devscripts/bash-completion.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import os
+from os.path import dirname as dirn
+import sys
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+import hypervideo_dl
+
+BASH_COMPLETION_FILE = "hypervideo.bash-completion"
+BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
+
+
+def build_completion(opt_parser):
+ opts_flag = []
+ for group in opt_parser.option_groups:
+ for option in group.option_list:
+ # for every long flag
+ opts_flag.append(option.get_opt_string())
+ with open(BASH_COMPLETION_TEMPLATE) as f:
+ template = f.read()
+ with open(BASH_COMPLETION_FILE, "w") as f:
+ # just using the special char
+ filled_template = template.replace("{{flags}}", " ".join(opts_flag))
+ f.write(filled_template)
+
+
+parser = hypervideo_dl.parseOpts()[0]
+build_completion(parser)
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
new file mode 100644
index 0000000..6f8aae1
--- /dev/null
+++ b/devscripts/buildserver.py
@@ -0,0 +1,433 @@
+#!/usr/bin/python3
+
+import argparse
+import ctypes
+import functools
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import traceback
+import os.path
+
+sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
+from hypervideo_dl.compat import (
+ compat_input,
+ compat_http_server,
+ compat_str,
+ compat_urlparse,
+)
+
+# These are not used outside of buildserver.py thus not in compat.py
+
+try:
+ import winreg as compat_winreg
+except ImportError: # Python 2
+ import _winreg as compat_winreg
+
+try:
+ import socketserver as compat_socketserver
+except ImportError: # Python 2
+ import SocketServer as compat_socketserver
+
+
+class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
+ allow_reuse_address = True
+
+
+advapi32 = ctypes.windll.advapi32
+
+SC_MANAGER_ALL_ACCESS = 0xf003f
+SC_MANAGER_CREATE_SERVICE = 0x02
+SERVICE_WIN32_OWN_PROCESS = 0x10
+SERVICE_AUTO_START = 0x2
+SERVICE_ERROR_NORMAL = 0x1
+DELETE = 0x00010000
+SERVICE_STATUS_START_PENDING = 0x00000002
+SERVICE_STATUS_RUNNING = 0x00000004
+SERVICE_ACCEPT_STOP = 0x1
+
+SVCNAME = 'youtubedl_builder'
+
+LPTSTR = ctypes.c_wchar_p
+START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
+
+
+class SERVICE_TABLE_ENTRY(ctypes.Structure):
+ _fields_ = [
+ ('lpServiceName', LPTSTR),
+ ('lpServiceProc', START_CALLBACK)
+ ]
+
+
+HandlerEx = ctypes.WINFUNCTYPE(
+ ctypes.c_int, # return
+ ctypes.c_int, # dwControl
+ ctypes.c_int, # dwEventType
+ ctypes.c_void_p, # lpEventData,
+ ctypes.c_void_p, # lpContext,
+)
+
+
+def _ctypes_array(c_type, py_array):
+ ar = (c_type * len(py_array))()
+ ar[:] = py_array
+ return ar
+
+
+def win_OpenSCManager():
+ res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
+ if not res:
+ raise Exception('Opening service manager failed - '
+ 'are you running this as administrator?')
+ return res
+
+
+def win_install_service(service_name, cmdline):
+ manager = win_OpenSCManager()
+ try:
+ h = advapi32.CreateServiceW(
+ manager, service_name, None,
+ SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
+ SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
+ cmdline, None, None, None, None, None)
+ if not h:
+ raise OSError('Service creation failed: %s' % ctypes.FormatError())
+
+ advapi32.CloseServiceHandle(h)
+ finally:
+ advapi32.CloseServiceHandle(manager)
+
+
+def win_uninstall_service(service_name):
+ manager = win_OpenSCManager()
+ try:
+ h = advapi32.OpenServiceW(manager, service_name, DELETE)
+ if not h:
+ raise OSError('Could not find service %s: %s' % (
+ service_name, ctypes.FormatError()))
+
+ try:
+ if not advapi32.DeleteService(h):
+ raise OSError('Deletion failed: %s' % ctypes.FormatError())
+ finally:
+ advapi32.CloseServiceHandle(h)
+ finally:
+ advapi32.CloseServiceHandle(manager)
+
+
+def win_service_report_event(service_name, msg, is_error=True):
+ with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
+ f.write(msg + '\n')
+
+ event_log = advapi32.RegisterEventSourceW(None, service_name)
+ if not event_log:
+ raise OSError('Could not report event: %s' % ctypes.FormatError())
+
+ try:
+ type_id = 0x0001 if is_error else 0x0004
+ event_id = 0xc0000000 if is_error else 0x40000000
+ lines = _ctypes_array(LPTSTR, [msg])
+
+ if not advapi32.ReportEventW(
+ event_log, type_id, 0, event_id, None, len(lines), 0,
+ lines, None):
+ raise OSError('Event reporting failed: %s' % ctypes.FormatError())
+ finally:
+ advapi32.DeregisterEventSource(event_log)
+
+
+def win_service_handler(stop_event, *args):
+ try:
+ raise ValueError('Handler called with args ' + repr(args))
+ TODO
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def win_service_set_status(handle, status_code):
+ svcStatus = SERVICE_STATUS()
+ svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
+ svcStatus.dwCurrentState = status_code
+ svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
+
+ svcStatus.dwServiceSpecificExitCode = 0
+
+ if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
+ raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
+
+
+def win_service_main(service_name, real_main, argc, argv_raw):
+ try:
+ # args = [argv_raw[i].value for i in range(argc)]
+ stop_event = threading.Event()
+ handler = HandlerEx(functools.partial(stop_event, win_service_handler))
+ h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
+ if not h:
+ raise OSError('Handler registration failed: %s' %
+ ctypes.FormatError())
+
+ TODO
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def win_service_start(service_name, real_main):
+ try:
+ cb = START_CALLBACK(
+ functools.partial(win_service_main, service_name, real_main))
+ dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
+ SERVICE_TABLE_ENTRY(
+ service_name,
+ cb
+ ),
+ SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
+ ])
+
+ if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
+ raise OSError('ctypes start failed: %s' % ctypes.FormatError())
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def main(args=None):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--install',
+ action='store_const', dest='action', const='install',
+ help='Launch at Windows startup')
+ parser.add_argument('-u', '--uninstall',
+ action='store_const', dest='action', const='uninstall',
+ help='Remove Windows service')
+ parser.add_argument('-s', '--service',
+ action='store_const', dest='action', const='service',
+ help='Run as a Windows service')
+ parser.add_argument('-b', '--bind', metavar='<host:port>',
+ action='store', default='0.0.0.0:8142',
+ help='Bind to host:port (default %default)')
+ options = parser.parse_args(args=args)
+
+ if options.action == 'install':
+ fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
+ cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
+ win_install_service(SVCNAME, cmdline)
+ return
+
+ if options.action == 'uninstall':
+ win_uninstall_service(SVCNAME)
+ return
+
+ if options.action == 'service':
+ win_service_start(SVCNAME, main)
+ return
+
+ host, port_str = options.bind.split(':')
+ port = int(port_str)
+
+ print('Listening on %s:%d' % (host, port))
+ srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
+ thr = threading.Thread(target=srv.serve_forever)
+ thr.start()
+ compat_input('Press ENTER to shut down')
+ srv.shutdown()
+ thr.join()
+
+
+def rmtree(path):
+ for name in os.listdir(path):
+ fname = os.path.join(path, name)
+ if os.path.isdir(fname):
+ rmtree(fname)
+ else:
+ os.chmod(fname, 0o666)
+ os.remove(fname)
+ os.rmdir(path)
+
+
+class BuildError(Exception):
+ def __init__(self, output, code=500):
+ self.output = output
+ self.code = code
+
+ def __str__(self):
+ return self.output
+
+
+class HTTPError(BuildError):
+ pass
+
+
+class PythonBuilder(object):
+ def __init__(self, **kwargs):
+ python_version = kwargs.pop('python', '3.4')
+ python_path = None
+ for node in ('Wow6432Node\\', ''):
+ try:
+ key = compat_winreg.OpenKey(
+ compat_winreg.HKEY_LOCAL_MACHINE,
+ r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
+ try:
+ python_path, _ = compat_winreg.QueryValueEx(key, '')
+ finally:
+ compat_winreg.CloseKey(key)
+ break
+ except Exception:
+ pass
+
+ if not python_path:
+ raise BuildError('No such Python version: %s' % python_version)
+
+ self.pythonPath = python_path
+
+ super(PythonBuilder, self).__init__(**kwargs)
+
+
+class GITInfoBuilder(object):
+ def __init__(self, **kwargs):
+ try:
+ self.user, self.repoName = kwargs['path'][:2]
+ self.rev = kwargs.pop('rev')
+ except ValueError:
+ raise BuildError('Invalid path')
+ except KeyError as e:
+ raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
+
+ path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
+ if not os.path.exists(path):
+ os.makedirs(path)
+ self.basePath = tempfile.mkdtemp(dir=path)
+ self.buildPath = os.path.join(self.basePath, 'build')
+
+ super(GITInfoBuilder, self).__init__(**kwargs)
+
+
+class GITBuilder(GITInfoBuilder):
+ def build(self):
+ try:
+ subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
+ subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ super(GITBuilder, self).build()
+
+
+class YoutubeDLBuilder(object):
+ authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org']
+
+ def __init__(self, **kwargs):
+ if self.repoName != 'hypervideo':
+ raise BuildError('Invalid repository "%s"' % self.repoName)
+ if self.user not in self.authorizedUsers:
+ raise HTTPError('Unauthorized user "%s"' % self.user, 401)
+
+ super(YoutubeDLBuilder, self).__init__(**kwargs)
+
+ def build(self):
+ try:
+ proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
+ proc.wait()
+ #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
+ # cwd=self.buildPath)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ super(YoutubeDLBuilder, self).build()
+
+
+class DownloadBuilder(object):
+ def __init__(self, **kwargs):
+ self.handler = kwargs.pop('handler')
+ self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
+ self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
+ if not self.srcPath.startswith(self.buildPath):
+ raise HTTPError(self.srcPath, 401)
+
+ super(DownloadBuilder, self).__init__(**kwargs)
+
+ def build(self):
+ if not os.path.exists(self.srcPath):
+ raise HTTPError('No such file', 404)
+ if os.path.isdir(self.srcPath):
+ raise HTTPError('Is a directory: %s' % self.srcPath, 401)
+
+ self.handler.send_response(200)
+ self.handler.send_header('Content-Type', 'application/octet-stream')
+ self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
+ self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
+ self.handler.end_headers()
+
+ with open(self.srcPath, 'rb') as src:
+ shutil.copyfileobj(src, self.handler.wfile)
+
+ super(DownloadBuilder, self).build()
+
+
+class CleanupTempDir(object):
+ def build(self):
+ try:
+ rmtree(self.basePath)
+ except Exception as e:
+ print('WARNING deleting "%s": %s' % (self.basePath, e))
+
+ super(CleanupTempDir, self).build()
+
+
+class Null(object):
+ def __init__(self, **kwargs):
+ pass
+
+ def start(self):
+ pass
+
+ def close(self):
+ pass
+
+ def build(self):
+ pass
+
+
+class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
+ pass
+
+
+class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
+
+ def do_GET(self):
+ path = compat_urlparse.urlparse(self.path)
+ paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
+ action, _, path = path.path.strip('/').partition('/')
+ if path:
+ path = path.split('/')
+ if action in self.actionDict:
+ try:
+ builder = self.actionDict[action](path=path, handler=self, **paramDict)
+ builder.start()
+ try:
+ builder.build()
+ finally:
+ builder.close()
+ except BuildError as e:
+ self.send_response(e.code)
+ msg = compat_str(e).encode('UTF-8')
+ self.send_header('Content-Type', 'text/plain; charset=UTF-8')
+ self.send_header('Content-Length', len(msg))
+ self.end_headers()
+ self.wfile.write(msg)
+ else:
+ self.send_response(500, 'Unknown build method "%s"' % action)
+ else:
+ self.send_response(500, 'Malformed URL')
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
new file mode 100644
index 0000000..a62711e
--- /dev/null
+++ b/devscripts/check-porn.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+
+A second approach implemented relies on a list of porn domains, to activate it
+pass the list filename as the only argument
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import gettestcases
+from hypervideo_dl.utils import compat_urllib_parse_urlparse
+from hypervideo_dl.utils import compat_urllib_request
+
+if len(sys.argv) > 1:
+ METHOD = 'LIST'
+ LIST = open(sys.argv[1]).read().decode('utf8').strip()
+else:
+ METHOD = 'EURISTIC'
+
+for test in gettestcases():
+ if METHOD == 'EURISTIC':
+ try:
+ webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+ except Exception:
+ print('\nFail: {0}'.format(test['name']))
+ continue
+
+ webpage = webpage.decode('utf8', 'replace')
+
+ RESULT = 'porn' in webpage.lower()
+
+ elif METHOD == 'LIST':
+ domain = compat_urllib_parse_urlparse(test['url']).netloc
+ if not domain:
+ print('\nFail: {0}'.format(test['name']))
+ continue
+ domain = '.'.join(domain.split('.')[-2:])
+
+ RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
+
+ if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
+ or test['info_dict']['age_limit'] != 18):
+ print('\nPotential missing age_limit check: {0}'.format(test['name']))
+
+ elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
+ and test['info_dict']['age_limit'] == 18):
+ print('\nPotential false negative: {0}'.format(test['name']))
+
+ else:
+ sys.stdout.write('.')
+ sys.stdout.flush()
+
+print()
diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in
new file mode 100644
index 0000000..38579d7
--- /dev/null
+++ b/devscripts/fish-completion.in
@@ -0,0 +1,5 @@
+
+{{commands}}
+
+
+complete --command hypervideo --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
new file mode 100755
index 0000000..b6e9949
--- /dev/null
+++ b/devscripts/fish-completion.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import optparse
+import os
+from os.path import dirname as dirn
+import sys
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+import hypervideo_dl
+from hypervideo_dl.utils import shell_quote
+
+FISH_COMPLETION_FILE = 'hypervideo.fish'
+FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
+
+EXTRA_ARGS = {
+ 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
+
+ # Options that need a file parameter
+ 'download-archive': ['--require-parameter'],
+ 'cookies': ['--require-parameter'],
+ 'load-info': ['--require-parameter'],
+ 'batch-file': ['--require-parameter'],
+}
+
+
+def build_completion(opt_parser):
+ commands = []
+
+ for group in opt_parser.option_groups:
+ for option in group.option_list:
+ long_option = option.get_opt_string().strip('-')
+ complete_cmd = ['complete', '--command', 'hypervideo', '--long-option', long_option]
+ if option._short_opts:
+ complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
+ if option.help != optparse.SUPPRESS_HELP:
+ complete_cmd += ['--description', option.help]
+ complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
+ commands.append(shell_quote(complete_cmd))
+
+ with open(FISH_COMPLETION_TEMPLATE) as f:
+ template = f.read()
+ filled_template = template.replace('{{commands}}', '\n'.join(commands))
+ with open(FISH_COMPLETION_FILE, 'w') as f:
+ f.write(filled_template)
+
+
+parser = hypervideo_dl.parseOpts()[0]
+build_completion(parser)
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
new file mode 100644
index 0000000..00dc5bd
--- /dev/null
+++ b/devscripts/generate_aes_testdata.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+import codecs
+import subprocess
+
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from hypervideo_dl.utils import intlist_to_bytes
+from hypervideo_dl.aes import aes_encrypt, key_expansion
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+ return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+ cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+ prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ out, _ = prog.communicate(secret_msg)
+ return out
+
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
new file mode 100644
index 0000000..c4e5fc1
--- /dev/null
+++ b/devscripts/lazy_load_template.py
@@ -0,0 +1,19 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+
+class LazyLoadExtractor(object):
+ _module = None
+
+ @classmethod
+ def ie_key(cls):
+ return cls.__name__[:-2]
+
+ def __new__(cls, *args, **kwargs):
+ mod = __import__(cls._module, fromlist=(cls.__name__,))
+ real_cls = getattr(mod, cls.__name__)
+ instance = real_cls.__new__(real_cls)
+ instance.__init__(*args, **kwargs)
+ return instance
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
new file mode 100755
index 0000000..dbc2e08
--- /dev/null
+++ b/devscripts/make_contributing.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import io
+import optparse
+import re
+
+
+def main():
+ parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
+ options, args = parser.parse_args()
+ if len(args) != 2:
+ parser.error('Expected an input and an output filename')
+
+ infile, outfile = args
+
+ with io.open(infile, encoding='utf-8') as inf:
+ readme = inf.read()
+
+ bug_text = re.search(
+ r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
+ dev_text = re.search(
+ r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING HYPERVIDEO',
+ readme).group(1)
+
+ out = bug_text + dev_text
+
+ with io.open(outfile, 'w', encoding='utf-8') as outf:
+ outf.write(out)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
new file mode 100644
index 0000000..b9a851c
--- /dev/null
+++ b/devscripts/make_lazy_extractors.py
@@ -0,0 +1,100 @@
+from __future__ import unicode_literals, print_function
+
+from inspect import getsource
+import io
+import os
+from os.path import dirname as dirn
+import sys
+
+print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+lazy_extractors_filename = sys.argv[1]
+if os.path.exists(lazy_extractors_filename):
+ os.remove(lazy_extractors_filename)
+
+from hypervideo_dl.extractor import _ALL_CLASSES
+from hypervideo_dl.extractor.common import InfoExtractor, SearchInfoExtractor
+
+with open('devscripts/lazy_load_template.py', 'rt') as f:
+ module_template = f.read()
+
+module_contents = [
+ module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
+ 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
+
+ie_template = '''
+class {name}({bases}):
+ _VALID_URL = {valid_url!r}
+ _module = '{module}'
+'''
+
+make_valid_template = '''
+ @classmethod
+ def _make_valid_url(cls):
+ return {valid_url!r}
+'''
+
+
+def get_base_name(base):
+ if base is InfoExtractor:
+ return 'LazyLoadExtractor'
+ elif base is SearchInfoExtractor:
+ return 'LazyLoadSearchExtractor'
+ else:
+ return base.__name__
+
+
+def build_lazy_ie(ie, name):
+ valid_url = getattr(ie, '_VALID_URL', None)
+ s = ie_template.format(
+ name=name,
+ bases=', '.join(map(get_base_name, ie.__bases__)),
+ valid_url=valid_url,
+ module=ie.__module__)
+ if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
+ s += '\n' + getsource(ie.suitable)
+ if hasattr(ie, '_make_valid_url'):
+ # search extractors
+ s += make_valid_template.format(valid_url=ie._make_valid_url())
+ return s
+
+
+# find the correct sorting and add the required base classes so that subclasses
+# can be correctly created
+classes = _ALL_CLASSES[:-1]
+ordered_cls = []
+while classes:
+ for c in classes[:]:
+ bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
+ stop = False
+ for b in bases:
+ if b not in classes and b not in ordered_cls:
+ if b.__name__ == 'GenericIE':
+ exit()
+ classes.insert(0, b)
+ stop = True
+ if stop:
+ break
+ if all(b in ordered_cls for b in bases):
+ ordered_cls.append(c)
+ classes.remove(c)
+ break
+ordered_cls.append(_ALL_CLASSES[-1])
+
+names = []
+for ie in ordered_cls:
+ name = ie.__name__
+ src = build_lazy_ie(ie, name)
+ module_contents.append(src)
+ if ie in _ALL_CLASSES:
+ names.append(name)
+
+module_contents.append(
+ '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
+
+module_src = '\n'.join(module_contents) + '\n'
+
+with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
+ f.write(module_src)
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
new file mode 100755
index 0000000..8fbce07
--- /dev/null
+++ b/devscripts/make_readme.py
@@ -0,0 +1,26 @@
+from __future__ import unicode_literals
+
+import io
+import sys
+import re
+
+README_FILE = 'README.md'
+helptext = sys.stdin.read()
+
+if isinstance(helptext, bytes):
+ helptext = helptext.decode('utf-8')
+
+with io.open(README_FILE, encoding='utf-8') as f:
+ oldreadme = f.read()
+
+header = oldreadme[:oldreadme.index('# OPTIONS')]
+footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
+
+options = helptext[helptext.index(' General Options:') + 19:]
+options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
+options = '# OPTIONS\n' + options + '\n'
+
+with io.open(README_FILE, 'w', encoding='utf-8') as f:
+ f.write(header)
+ f.write(options)
+ f.write(footer)
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
new file mode 100644
index 0000000..09807b0
--- /dev/null
+++ b/devscripts/make_supportedsites.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import io
+import optparse
+import os
+import sys
+
+
+# Import hypervideo_dl
+ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
+sys.path.insert(0, ROOT_DIR)
+import hypervideo_dl
+
+
+def main():
+ parser = optparse.OptionParser(usage='%prog OUTFILE.md')
+ options, args = parser.parse_args()
+ if len(args) != 1:
+ parser.error('Expected an output filename')
+
+ outfile, = args
+
+ def gen_ies_md(ies):
+ for ie in ies:
+ ie_md = '**{0}**'.format(ie.IE_NAME)
+ ie_desc = getattr(ie, 'IE_DESC', None)
+ if ie_desc is False:
+ continue
+ if ie_desc is not None:
+ ie_md += ': {0}'.format(ie.IE_DESC)
+ if not ie.working():
+ ie_md += ' (Currently broken)'
+ yield ie_md
+
+ ies = sorted(hypervideo_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
+ out = '# Supported sites\n' + ''.join(
+ ' - ' + md + '\n'
+ for md in gen_ies_md(ies))
+
+ with io.open(outfile, 'w', encoding='utf-8') as outf:
+ outf.write(out)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/posix-locale.sh b/devscripts/posix-locale.sh
new file mode 100755
index 0000000..0aa7a59
--- /dev/null
+++ b/devscripts/posix-locale.sh
@@ -0,0 +1,6 @@
+
+# source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point)
+
+export LC_ALL=POSIX
+export LANG=POSIX
+export LANGUAGE=POSIX
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
new file mode 100644
index 0000000..5b74238
--- /dev/null
+++ b/devscripts/prepare_manpage.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+import io
+import optparse
+import os.path
+import re
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+README_FILE = os.path.join(ROOT_DIR, 'README.md')
+
+PREFIX = r'''%HYPERVIDEO(1)
+
+# NAME
+
+youtube\-dl \- download videos from youtube.com or other video platforms
+
+# SYNOPSIS
+
+**hypervideo** \[OPTIONS\] URL [URL...]
+
+'''
+
+
+def main():
+ parser = optparse.OptionParser(usage='%prog OUTFILE.md')
+ options, args = parser.parse_args()
+ if len(args) != 1:
+ parser.error('Expected an output filename')
+
+ outfile, = args
+
+ with io.open(README_FILE, encoding='utf-8') as f:
+ readme = f.read()
+
+ readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
+ readme = re.sub(r'\s+hypervideo \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
+ readme = PREFIX + readme
+
+ readme = filter_options(readme)
+
+ with io.open(outfile, 'w', encoding='utf-8') as outf:
+ outf.write(readme)
+
+
+def filter_options(readme):
+ ret = ''
+ in_options = False
+ for line in readme.split('\n'):
+ if line.startswith('# '):
+ if line[2:].startswith('OPTIONS'):
+ in_options = True
+ else:
+ in_options = False
+
+ if in_options:
+ if line.lstrip().startswith('-'):
+ split = re.split(r'\s{2,}', line.lstrip())
+ # Description string may start with `-` as well. If there is
+ # only one piece then it's a description bit not an option.
+ if len(split) > 1:
+ option, description = split
+ split_option = option.split(' ')
+
+ if not split_option[-1].startswith('-'): # metavar
+ option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
+
+ # Pandoc's definition_lists. See http://pandoc.org/README.html
+ # for more information.
+ ret += '\n%s\n: %s\n' % (option, description)
+ continue
+ ret += line.lstrip() + '\n'
+ else:
+ ret += line + '\n'
+
+ return ret
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat
new file mode 100644
index 0000000..01a79b6
--- /dev/null
+++ b/devscripts/run_tests.bat
@@ -0,0 +1,17 @@
+@echo off
+
+rem Keep this list in sync with the `offlinetest` target in Makefile
+set DOWNLOAD_TESTS="age_restriction^|download^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature"
+
+if "%YTDL_TEST_SET%" == "core" (
+ set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
+ set multiprocess_args=""
+) else if "%YTDL_TEST_SET%" == "download" (
+ set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
+ set multiprocess_args="--processes=4 --process-timeout=540"
+) else (
+ echo YTDL_TEST_SET is not set or invalid
+ exit /b 1
+)
+
+nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
new file mode 100755
index 0000000..b8f48b9
--- /dev/null
+++ b/devscripts/run_tests.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Keep this list in sync with the `offlinetest` target in Makefile
+DOWNLOAD_TESTS="age_restriction|download|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
+
+test_set=""
+multiprocess_args=""
+
+case "$YTDL_TEST_SET" in
+ core)
+ test_set="-I test_($DOWNLOAD_TESTS)\.py"
+ ;;
+ download)
+ test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
+ multiprocess_args="--processes=4 --process-timeout=540"
+ ;;
+ *)
+ break
+ ;;
+esac
+
+nosetests test --verbose $test_set $multiprocess_args
diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in
new file mode 100644
index 0000000..1906949
--- /dev/null
+++ b/devscripts/zsh-completion.in
@@ -0,0 +1,28 @@
+#compdef hypervideo
+
+__hypervideo_dl() {
+ local curcontext="$curcontext" fileopts diropts cur prev
+ typeset -A opt_args
+ fileopts="{{fileopts}}"
+ diropts="{{diropts}}"
+ cur=$words[CURRENT]
+ case $cur in
+ :)
+ _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)'
+ ;;
+ *)
+ prev=$words[CURRENT-1]
+ if [[ ${prev} =~ ${fileopts} ]]; then
+ _path_files
+ elif [[ ${prev} =~ ${diropts} ]]; then
+ _path_files -/
+ elif [[ ${prev} == "--recode-video" ]]; then
+ _arguments '*: :(mp4 flv ogg webm mkv)'
+ else
+ _arguments '*: :({{flags}})'
+ fi
+ ;;
+ esac
+}
+
+__hypervideo_dl \ No newline at end of file
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
new file mode 100755
index 0000000..b570469
--- /dev/null
+++ b/devscripts/zsh-completion.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import os
+from os.path import dirname as dirn
+import sys
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+import hypervideo_dl
+
+ZSH_COMPLETION_FILE = "hypervideo.zsh"
+ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
+
+
+def build_completion(opt_parser):
+ opts = [opt for group in opt_parser.option_groups
+ for opt in group.option_list]
+ opts_file = [opt for opt in opts if opt.metavar == "FILE"]
+ opts_dir = [opt for opt in opts if opt.metavar == "DIR"]
+
+ fileopts = []
+ for opt in opts_file:
+ if opt._short_opts:
+ fileopts.extend(opt._short_opts)
+ if opt._long_opts:
+ fileopts.extend(opt._long_opts)
+
+ diropts = []
+ for opt in opts_dir:
+ if opt._short_opts:
+ diropts.extend(opt._short_opts)
+ if opt._long_opts:
+ diropts.extend(opt._long_opts)
+
+ flags = [opt.get_opt_string() for opt in opts]
+
+ with open(ZSH_COMPLETION_TEMPLATE) as f:
+ template = f.read()
+
+ template = template.replace("{{fileopts}}", "|".join(fileopts))
+ template = template.replace("{{diropts}}", "|".join(diropts))
+ template = template.replace("{{flags}}", " ".join(flags))
+
+ with open(ZSH_COMPLETION_FILE, "w") as f:
+ f.write(template)
+
+
+parser = hypervideo_dl.parseOpts()[0]
+build_completion(parser)
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000..69fa449
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1 @@
+_build/
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..805682f
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " xml to make Docutils-native XML files"
+ @echo " pseudoxml to make pseudoxml-XML files for display purposes"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/hypervideo.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/hypervideo.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/hypervideo"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/hypervideo"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through platex and dvipdfmx..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+ $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+ @echo
+ @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+ @echo
+ @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..4b48a85
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+#
+# hypervideo documentation build configuration file, created by
+# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+# Allows to import hypervideo_dl
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# -- General configuration ------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'hypervideo'
+copyright = u'2014, Ricardo Garcia Gonzalez'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+from hypervideo_dl.version import __version__
+version = __version__
+# The full version, including alpha/beta/rc tags.
+release = version
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'hypervideodoc'
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..93b8704
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,23 @@
+Welcome to hypervideo's documentation!
+======================================
+
+*hypervideo* is a command-line program to download videos from YouTube.com and more sites.
+It can also be used in Python code.
+
+Developer guide
+---------------
+
+This section contains information for using *hypervideo* from Python programs.
+
+.. toctree::
+ :maxdepth: 2
+
+ module_guide
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/docs/module_guide.rst b/docs/module_guide.rst
new file mode 100644
index 0000000..a27bc8f
--- /dev/null
+++ b/docs/module_guide.rst
@@ -0,0 +1,67 @@
+Using the ``hypervideo_dl`` module
+===============================
+
+When using the ``hypervideo_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
+
+.. code-block:: python
+
+ >>> from hypervideo_dl import YoutubeDL
+ >>> ydl = YoutubeDL()
+ >>> ydl.add_default_info_extractors()
+
+Extracting video information
+----------------------------
+
+You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
+
+.. code-block:: python
+
+ >>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
+ [youtube] Setting language
+ [youtube] BaW_jenozKc: Downloading webpage
+ [youtube] BaW_jenozKc: Downloading video info webpage
+ [youtube] BaW_jenozKc: Extracting video information
+ >>> info['title']
+ 'hypervideo test video "\'/\\ä↭𝕐'
+ >>> info['height'], info['width']
+ (720, 1280)
+
+If you want to download or play the video you can get its url:
+
+.. code-block:: python
+
+ >>> info['url']
+ 'https://...'
+
+Extracting playlist information
+-------------------------------
+
+The playlist information is extracted in a similar way, but the dictionary is a bit different:
+
+.. code-block:: python
+
+ >>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
+ [TED] open_source_open_world: Downloading playlist webpage
+ ...
+ >>> playlist['title']
+ 'Open-source, open world'
+
+
+
+You can access the videos in the playlist with the ``entries`` field:
+
+.. code-block:: python
+
+ >>> for video in playlist['entries']:
+ ... print('Video #%d: %s' % (video['playlist_index'], video['title']))
+
+ Video #1: How Arduino is open-sourcing imagination
+ Video #2: The year open data went worldwide
+ Video #3: Massive-scale online collaboration
+ Video #4: The art of asking
+ Video #5: How cognitive surplus will change the world
+ Video #6: The birth of Wikipedia
+ Video #7: Coding a better government
+ Video #8: The era of open innovation
+ Video #9: The currency of the new economy is trust
+
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
new file mode 100644
index 0000000..ed0d5e9
--- /dev/null
+++ b/docs/supportedsites.md
@@ -0,0 +1,1228 @@
+# Supported sites
+ - **1tv**: Первый канал
+ - **20min**
+ - **220.ro**
+ - **23video**
+ - **247sports**
+ - **24video**
+ - **3qsdn**: 3Q SDN
+ - **3sat**
+ - **4tube**
+ - **56.com**
+ - **5min**
+ - **6play**
+ - **7plus**
+ - **8tracks**
+ - **91porn**
+ - **9c9media**
+ - **9gag**
+ - **9now.com.au**
+ - **abc.net.au**
+ - **abc.net.au:iview**
+ - **abcnews**
+ - **abcnews:video**
+ - **abcotvs**: ABC Owned Television Stations
+ - **abcotvs:clips**
+ - **AcademicEarth:Course**
+ - **acast**
+ - **acast:channel**
+ - **ADN**: Anime Digital Network
+ - **AdobeConnect**
+ - **adobetv**
+ - **adobetv:channel**
+ - **adobetv:embed**
+ - **adobetv:show**
+ - **adobetv:video**
+ - **AdultSwim**
+ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
+ - **aenetworks:collection**
+ - **aenetworks:show**
+ - **afreecatv**: afreecatv.com
+ - **AirMozilla**
+ - **AliExpressLive**
+ - **AlJazeera**
+ - **Allocine**
+ - **AlphaPorno**
+ - **Amara**
+ - **AMCNetworks**
+ - **AmericasTestKitchen**
+ - **AmericasTestKitchenSeason**
+ - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **AnimeOnDemand**
+ - **Anvato**
+ - **aol.com**: Yahoo screen and movies
+ - **APA**
+ - **Aparat**
+ - **AppleConnect**
+ - **AppleDaily**: 臺灣蘋果日報
+ - **ApplePodcasts**
+ - **appletrailers**
+ - **appletrailers:section**
+ - **archive.org**: archive.org videos
+ - **ArcPublishing**
+ - **ARD**
+ - **ARD:mediathek**
+ - **ARDBetaMediathek**
+ - **Arkena**
+ - **arte.sky.it**
+ - **ArteTV**
+ - **ArteTVEmbed**
+ - **ArteTVPlaylist**
+ - **AsianCrush**
+ - **AsianCrushPlaylist**
+ - **AtresPlayer**
+ - **ATTTechChannel**
+ - **ATVAt**
+ - **AudiMedia**
+ - **AudioBoom**
+ - **audiomack**
+ - **audiomack:album**
+ - **AWAAN**
+ - **awaan:live**
+ - **awaan:season**
+ - **awaan:video**
+ - **AZMedien**: AZ Medien videos
+ - **BaiduVideo**: 百度视频
+ - **bandaichannel**
+ - **Bandcamp**
+ - **Bandcamp:album**
+ - **Bandcamp:weekly**
+ - **bangumi.bilibili.com**: BiliBili番剧
+ - **bbc**: BBC
+ - **bbc.co.uk**: BBC iPlayer
+ - **bbc.co.uk:article**: BBC articles
+ - **bbc.co.uk:iplayer:episodes**
+ - **bbc.co.uk:iplayer:group**
+ - **bbc.co.uk:playlist**
+ - **BBVTV**
+ - **Beatport**
+ - **Beeg**
+ - **BehindKink**
+ - **Bellator**
+ - **BellMedia**
+ - **Bet**
+ - **bfi:player**
+ - **bfmtv**
+ - **bfmtv:article**
+ - **bfmtv:live**
+ - **BibelTV**
+ - **Bigflix**
+ - **Bild**: Bild.de
+ - **BiliBili**
+ - **BilibiliAudio**
+ - **BilibiliAudioAlbum**
+ - **BiliBiliPlayer**
+ - **BioBioChileTV**
+ - **Biography**
+ - **BIQLE**
+ - **BitChute**
+ - **BitChuteChannel**
+ - **BleacherReport**
+ - **BleacherReportCMS**
+ - **Bloomberg**
+ - **BokeCC**
+ - **BongaCams**
+ - **BostonGlobe**
+ - **Box**
+ - **Bpb**: Bundeszentrale für politische Bildung
+ - **BR**: Bayerischer Rundfunk
+ - **BravoTV**
+ - **Break**
+ - **brightcove:legacy**
+ - **brightcove:new**
+ - **BRMediathek**: Bayerischer Rundfunk Mediathek
+ - **bt:article**: Bergens Tidende Articles
+ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
+ - **BusinessInsider**
+ - **BuzzFeed**
+ - **BYUtv**
+ - **Camdemy**
+ - **CamdemyFolder**
+ - **CamModels**
+ - **CamTube**
+ - **CamWithHer**
+ - **canalc2.tv**
+ - **Canalplus**: mycanal.fr and piwiplus.fr
+ - **Canvas**
+ - **CanvasEen**: canvas.be and een.be
+ - **CarambaTV**
+ - **CarambaTVPage**
+ - **CartoonNetwork**
+ - **cbc.ca**
+ - **cbc.ca:olympics**
+ - **cbc.ca:player**
+ - **cbc.ca:watch**
+ - **cbc.ca:watch:video**
+ - **CBS**
+ - **CBSInteractive**
+ - **CBSLocal**
+ - **CBSLocalArticle**
+ - **cbsnews**: CBS News
+ - **cbsnews:embed**
+ - **cbsnews:livevideo**: CBS News Live Videos
+ - **cbssports**
+ - **cbssports:embed**
+ - **CCMA**
+ - **CCTV**: 央视网
+ - **CDA**
+ - **CeskaTelevize**
+ - **CeskaTelevizePorady**
+ - **channel9**: Channel 9
+ - **CharlieRose**
+ - **Chaturbate**
+ - **Chilloutzone**
+ - **chirbit**
+ - **chirbit:profile**
+ - **cielotv.it**
+ - **Cinchcast**
+ - **Cinemax**
+ - **CiscoLiveSearch**
+ - **CiscoLiveSession**
+ - **CJSW**
+ - **cliphunter**
+ - **Clippit**
+ - **ClipRs**
+ - **Clipsyndicate**
+ - **CloserToTruth**
+ - **CloudflareStream**
+ - **Cloudy**
+ - **Clubic**
+ - **Clyp**
+ - **cmt.com**
+ - **CNBC**
+ - **CNBCVideo**
+ - **CNN**
+ - **CNNArticle**
+ - **CNNBlogs**
+ - **ComedyCentral**
+ - **ComedyCentralTV**
+ - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
+ - **CONtv**
+ - **Corus**
+ - **Coub**
+ - **Cracked**
+ - **Crackle**
+ - **CrooksAndLiars**
+ - **crunchyroll**
+ - **crunchyroll:playlist**
+ - **CSpan**: C-SPAN
+ - **CtsNews**: 華視新聞
+ - **CTV**
+ - **CTVNews**
+ - **cu.ntv.co.jp**: Nippon Television Network
+ - **Culturebox**
+ - **CultureUnplugged**
+ - **curiositystream**
+ - **curiositystream:collection**
+ - **CWTV**
+ - **DagelijkseKost**: dagelijksekost.een.be
+ - **DailyMail**
+ - **dailymotion**
+ - **dailymotion:playlist**
+ - **dailymotion:user**
+ - **daum.net**
+ - **daum.net:clip**
+ - **daum.net:playlist**
+ - **daum.net:user**
+ - **DBTV**
+ - **DctpTv**
+ - **DeezerPlaylist**
+ - **defense.gouv.fr**
+ - **democracynow**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum
+ - **Digg**
+ - **DigitallySpeaking**
+ - **Digiteka**
+ - **Discovery**
+ - **DiscoveryGo**
+ - **DiscoveryGoPlaylist**
+ - **DiscoveryNetworksDe**
+ - **DiscoveryPlus**
+ - **DiscoveryVR**
+ - **Disney**
+ - **dlive:stream**
+ - **dlive:vod**
+ - **Dotsub**
+ - **DouyuShow**
+ - **DouyuTV**: 斗鱼
+ - **DPlay**
+ - **DRBonanza**
+ - **Dropbox**
+ - **DrTuber**
+ - **drtv**
+ - **drtv:live**
+ - **DTube**
+ - **Dumpert**
+ - **dvtv**: http://video.aktualne.cz/
+ - **dw**
+ - **dw:article**
+ - **EaglePlatform**
+ - **EbaumsWorld**
+ - **EchoMsk**
+ - **egghead:course**: egghead.io course
+ - **egghead:lesson**: egghead.io lesson
+ - **ehftv**
+ - **eHow**
+ - **EinsUndEinsTV**
+ - **Einthusan**
+ - **eitb.tv**
+ - **EllenTube**
+ - **EllenTubePlaylist**
+ - **EllenTubeVideo**
+ - **ElPais**: El País
+ - **Embedly**
+ - **EMPFlix**
+ - **Engadget**
+ - **Eporner**
+ - **EroProfile**
+ - **Escapist**
+ - **ESPN**
+ - **ESPNArticle**
+ - **EsriVideo**
+ - **Europa**
+ - **EWETV**
+ - **ExpoTV**
+ - **Expressen**
+ - **ExtremeTube**
+ - **EyedoTV**
+ - **facebook**
+ - **FacebookPluginsVideo**
+ - **faz.net**
+ - **fc2**
+ - **fc2:embed**
+ - **Fczenit**
+ - **filmon**
+ - **filmon:channel**
+ - **Filmweb**
+ - **FiveThirtyEight**
+ - **FiveTV**
+ - **Flickr**
+ - **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **FootyRoom**
+ - **Formula1**
+ - **FOX**
+ - **FOX9**
+ - **FOX9News**
+ - **Foxgay**
+ - **foxnews**: Fox News and Fox Business Video
+ - **foxnews:article**
+ - **FoxSports**
+ - **france2.fr:generation-what**
+ - **FranceCulture**
+ - **FranceInter**
+ - **FranceTV**
+ - **FranceTVEmbed**
+ - **francetvinfo.fr**
+ - **FranceTVJeunesse**
+ - **FranceTVSite**
+ - **Freesound**
+ - **freespeech.org**
+ - **FreshLive**
+ - **FrontendMasters**
+ - **FrontendMastersCourse**
+ - **FrontendMastersLesson**
+ - **FujiTVFODPlus7**
+ - **Funimation**
+ - **Funk**
+ - **Fusion**
+ - **Fux**
+ - **Gaia**
+ - **GameInformer**
+ - **GameSpot**
+ - **GameStar**
+ - **Gaskrank**
+ - **Gazeta**
+ - **GDCVault**
+ - **GediDigital**
+ - **generic**: Generic downloader that works on some sites
+ - **Gfycat**
+ - **GiantBomb**
+ - **Giga**
+ - **GlattvisionTV**
+ - **Glide**: Glide mobile video messages (glide.me)
+ - **Globo**
+ - **GloboArticle**
+ - **Go**
+ - **GodTube**
+ - **Golem**
+ - **google:podcasts**
+ - **google:podcasts:feed**
+ - **GoogleDrive**
+ - **Goshgay**
+ - **GPUTechConf**
+ - **Groupon**
+ - **hbo**
+ - **HearThisAt**
+ - **Heise**
+ - **HellPorno**
+ - **Helsinki**: helsinki.fi
+ - **HentaiStigma**
+ - **hetklokhuis**
+ - **hgtv.com:show**
+ - **HGTVDe**
+ - **HiDive**
+ - **HistoricFilms**
+ - **history:player**
+ - **history:topic**: History.com Topic
+ - **hitbox**
+ - **hitbox:live**
+ - **HitRecord**
+ - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
+ - **HornBunny**
+ - **HotNewHipHop**
+ - **hotstar**
+ - **hotstar:playlist**
+ - **Howcast**
+ - **HowStuffWorks**
+ - **HRTi**
+ - **HRTiPlaylist**
+ - **Huajiao**: 花椒直播
+ - **HuffPost**: Huffington Post
+ - **Hungama**
+ - **HungamaSong**
+ - **Hypem**
+ - **ign.com**
+ - **IGNArticle**
+ - **IGNVideo**
+ - **IHeartRadio**
+ - **iheartradio:podcast**
+ - **imdb**: Internet Movie Database trailers
+ - **imdb:list**: Internet Movie Database lists
+ - **Imgur**
+ - **imgur:album**
+ - **imgur:gallery**
+ - **Ina**
+ - **Inc**
+ - **IndavideoEmbed**
+ - **InfoQ**
+ - **Instagram**
+ - **instagram:tag**: Instagram hashtag search
+ - **instagram:user**: Instagram user profile
+ - **Internazionale**
+ - **InternetVideoArchive**
+ - **IPrima**
+ - **iqiyi**: 爱奇艺
+ - **Ir90Tv**
+ - **ITTF**
+ - **ITV**
+ - **ITVBTCC**
+ - **ivi**: ivi.ru
+ - **ivi:compilation**: ivi.ru compilations
+ - **ivideon**: Ivideon TV
+ - **Iwara**
+ - **Izlesene**
+ - **Jamendo**
+ - **JamendoAlbum**
+ - **JeuxVideo**
+ - **Joj**
+ - **Jove**
+ - **JWPlatform**
+ - **Kakao**
+ - **Kaltura**
+ - **Kankan**
+ - **Karaoketv**
+ - **KarriereVideos**
+ - **Katsomo**
+ - **KeezMovies**
+ - **Ketnet**
+ - **khanacademy**
+ - **khanacademy:unit**
+ - **KickStarter**
+ - **KinjaEmbed**
+ - **KinoPoisk**
+ - **KonserthusetPlay**
+ - **KrasView**: Красвью
+ - **Ku6**
+ - **KUSI**
+ - **kuwo:album**: 酷我音乐 - 专辑
+ - **kuwo:category**: 酷我音乐 - 分类
+ - **kuwo:chart**: 酷我音乐 - 排行榜
+ - **kuwo:mv**: 酷我音乐 - MV
+ - **kuwo:singer**: 酷我音乐 - 歌手
+ - **kuwo:song**: 酷我音乐
+ - **la7.it**
+ - **laola1tv**
+ - **laola1tv:embed**
+ - **lbry**
+ - **lbry:channel**
+ - **LCI**
+ - **Lcp**
+ - **LcpPlay**
+ - **Le**: 乐视网
+ - **Lecture2Go**
+ - **Lecturio**
+ - **LecturioCourse**
+ - **LecturioDeCourse**
+ - **LEGO**
+ - **Lemonde**
+ - **Lenta**
+ - **LePlaylist**
+ - **LetvCloud**: 乐视云
+ - **Libsyn**
+ - **life**: Life.ru
+ - **life:embed**
+ - **limelight**
+ - **limelight:channel**
+ - **limelight:channel_list**
+ - **LineLive**
+ - **LineLiveChannel**
+ - **LineTV**
+ - **linkedin:learning**
+ - **linkedin:learning:course**
+ - **LinuxAcademy**
+ - **LiTV**
+ - **LiveJournal**
+ - **LiveLeak**
+ - **LiveLeakEmbed**
+ - **livestream**
+ - **livestream:original**
+ - **LnkGo**
+ - **loc**: Library of Congress
+ - **LocalNews8**
+ - **LoveHomePorn**
+ - **lrt.lt**
+ - **lynda**: lynda.com videos
+ - **lynda:course**: lynda.com online courses
+ - **m6**
+ - **mailru**: Видео@Mail.Ru
+ - **mailru:music**: Музыка@Mail.Ru
+ - **mailru:music:search**: Музыка@Mail.Ru
+ - **MallTV**
+ - **mangomolo:live**
+ - **mangomolo:video**
+ - **ManyVids**
+ - **MaoriTV**
+ - **Markiza**
+ - **MarkizaPage**
+ - **massengeschmack.tv**
+ - **MatchTV**
+ - **MDR**: MDR.DE and KiKA
+ - **MedalTV**
+ - **media.ccc.de**
+ - **media.ccc.de:lists**
+ - **Medialaan**
+ - **Mediaset**
+ - **Mediasite**
+ - **MediasiteCatalog**
+ - **MediasiteNamedCatalog**
+ - **Medici**
+ - **megaphone.fm**: megaphone.fm embedded players
+ - **Meipai**: 美拍
+ - **MelonVOD**
+ - **META**
+ - **metacafe**
+ - **Metacritic**
+ - **mewatch**
+ - **Mgoon**
+ - **MGTV**: 芒果TV
+ - **MiaoPai**
+ - **minds**
+ - **minds:channel**
+ - **minds:group**
+ - **MinistryGrid**
+ - **Minoto**
+ - **miomio.tv**
+ - **MiTele**: mitele.es
+ - **mixcloud**
+ - **mixcloud:playlist**
+ - **mixcloud:user**
+ - **MLB**
+ - **MLBVideo**
+ - **Mnet**
+ - **MNetTV**
+ - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
+ - **Mofosex**
+ - **MofosexEmbed**
+ - **Mojvideo**
+ - **Morningstar**: morningstar.com
+ - **Motherless**
+ - **MotherlessGroup**
+ - **Motorsport**: motorsport.com
+ - **MovieClips**
+ - **MovieFap**
+ - **Moviezine**
+ - **MovingImage**
+ - **MSN**
+ - **mtg**: MTG services
+ - **mtv**
+ - **mtv.de**
+ - **mtv:video**
+ - **mtvjapan**
+ - **mtvservices:embedded**
+ - **MTVUutisetArticle**
+ - **MuenchenTV**: münchen.tv
+ - **mva**: Microsoft Virtual Academy videos
+ - **mva:course**: Microsoft Virtual Academy courses
+ - **Mwave**
+ - **MwaveMeetGreet**
+ - **MyChannels**
+ - **MySpace**
+ - **MySpace:album**
+ - **MySpass**
+ - **Myvi**
+ - **MyVidster**
+ - **MyviEmbed**
+ - **MyVisionTV**
+ - **n-tv.de**
+ - **natgeo:video**
+ - **NationalGeographicTV**
+ - **Naver**
+ - **NBA**
+ - **nba:watch**
+ - **nba:watch:collection**
+ - **NBAChannel**
+ - **NBAEmbed**
+ - **NBAWatchEmbed**
+ - **NBC**
+ - **NBCNews**
+ - **nbcolympics**
+ - **nbcolympics:stream**
+ - **NBCSports**
+ - **NBCSportsStream**
+ - **NBCSportsVPlayer**
+ - **ndr**: NDR.de - Norddeutscher Rundfunk
+ - **ndr:embed**
+ - **ndr:embed:base**
+ - **NDTV**
+ - **NerdCubedFeed**
+ - **netease:album**: 网易云音乐 - 专辑
+ - **netease:djradio**: 网易云音乐 - 电台
+ - **netease:mv**: 网易云音乐 - MV
+ - **netease:playlist**: 网易云音乐 - 歌单
+ - **netease:program**: 网易云音乐 - 电台节目
+ - **netease:singer**: 网易云音乐 - 歌手
+ - **netease:song**: 网易云音乐
+ - **NetPlus**
+ - **Netzkino**
+ - **Newgrounds**
+ - **NewgroundsPlaylist**
+ - **Newstube**
+ - **NextMedia**: 蘋果日報
+ - **NextMediaActionNews**: 蘋果日報 - 動新聞
+ - **NextTV**: 壹電視
+ - **Nexx**
+ - **NexxEmbed**
+ - **nfl.com** (Currently broken)
+ - **nfl.com:article** (Currently broken)
+ - **NhkVod**
+ - **NhkVodProgram**
+ - **nhl.com**
+ - **nick.com**
+ - **nick.de**
+ - **nickelodeon:br**
+ - **nickelodeonru**
+ - **nicknight**
+ - **niconico**: ニコニコ動画
+ - **NiconicoPlaylist**
+ - **Nintendo**
+ - **njoy**: N-JOY
+ - **njoy:embed**
+ - **NJPWWorld**: 新日本プロレスワールド
+ - **NobelPrize**
+ - **NonkTube**
+ - **Noovo**
+ - **Normalboots**
+ - **NosVideo**
+ - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
+ - **NovaEmbed**
+ - **nowness**
+ - **nowness:playlist**
+ - **nowness:series**
+ - **Noz**
+ - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **npo.nl:live**
+ - **npo.nl:radio**
+ - **npo.nl:radio:fragment**
+ - **Npr**
+ - **NRK**
+ - **NRKPlaylist**
+ - **NRKRadioPodkast**
+ - **NRKSkole**: NRK Skole
+ - **NRKTV**: NRK TV and NRK Radio
+ - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
+ - **NRKTVEpisode**
+ - **NRKTVEpisodes**
+ - **NRKTVSeason**
+ - **NRKTVSeries**
+ - **NRLTV**
+ - **ntv.ru**
+ - **Nuvid**
+ - **NYTimes**
+ - **NYTimesArticle**
+ - **NYTimesCooking**
+ - **NZZ**
+ - **ocw.mit.edu**
+ - **OdaTV**
+ - **Odnoklassniki**
+ - **OktoberfestTV**
+ - **OnDemandKorea**
+ - **onet.pl**
+ - **onet.tv**
+ - **onet.tv:channel**
+ - **OnetMVP**
+ - **OnionStudios**
+ - **Ooyala**
+ - **OoyalaExternal**
+ - **OraTV**
+ - **orf:burgenland**: Radio Burgenland
+ - **orf:fm4**: radio FM4
+ - **orf:fm4:story**: fm4.orf.at stories
+ - **orf:iptv**: iptv.ORF.at
+ - **orf:kaernten**: Radio Kärnten
+ - **orf:noe**: Radio Niederösterreich
+ - **orf:oberoesterreich**: Radio Oberösterreich
+ - **orf:oe1**: Radio Österreich 1
+ - **orf:oe3**: Radio Österreich 3
+ - **orf:salzburg**: Radio Salzburg
+ - **orf:steiermark**: Radio Steiermark
+ - **orf:tirol**: Radio Tirol
+ - **orf:tvthek**: ORF TVthek
+ - **orf:vorarlberg**: Radio Vorarlberg
+ - **orf:wien**: Radio Wien
+ - **OsnatelTV**
+ - **OutsideTV**
+ - **PacktPub**
+ - **PacktPubCourse**
+ - **PalcoMP3:artist**
+ - **PalcoMP3:song**
+ - **PalcoMP3:video**
+ - **pandora.tv**: 판도라TV
+ - **ParamountNetwork**
+ - **parliamentlive.tv**: UK parliament videos
+ - **Patreon**
+ - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
+ - **PearVideo**
+ - **PeerTube**
+ - **People**
+ - **PerformGroup**
+ - **periscope**: Periscope
+ - **periscope:user**: Periscope user videos
+ - **PhilharmonieDeParis**: Philharmonie de Paris
+ - **phoenix.de**
+ - **Photobucket**
+ - **Picarto**
+ - **PicartoVod**
+ - **Piksel**
+ - **Pinkbike**
+ - **Pinterest**
+ - **PinterestCollection**
+ - **Pladform**
+ - **Platzi**
+ - **PlatziCourse**
+ - **play.fm**
+ - **player.sky.it**
+ - **PlayPlusTV**
+ - **PlayStuff**
+ - **PlaysTV**
+ - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
+ - **Playvid**
+ - **Playwire**
+ - **pluralsight**
+ - **pluralsight:course**
+ - **podomatic**
+ - **Pokemon**
+ - **PolskieRadio**
+ - **PolskieRadioCategory**
+ - **Popcorntimes**
+ - **PopcornTV**
+ - **PornCom**
+ - **PornerBros**
+ - **PornHd**
+ - **PornHub**: PornHub and Thumbzilla
+ - **PornHubPagedVideoList**
+ - **PornHubUser**
+ - **PornHubUserVideosUpload**
+ - **Pornotube**
+ - **PornoVoisines**
+ - **PornoXO**
+ - **PornTube**
+ - **PressTV**
+ - **prosiebensat1**: ProSiebenSat.1 Digital
+ - **puhutv**
+ - **puhutv:serie**
+ - **Puls4**
+ - **Pyvideo**
+ - **qqmusic**: QQ音乐
+ - **qqmusic:album**: QQ音乐 - 专辑
+ - **qqmusic:playlist**: QQ音乐 - 歌单
+ - **qqmusic:singer**: QQ音乐 - 歌手
+ - **qqmusic:toplist**: QQ音乐 - 排行榜
+ - **QuantumTV**
+ - **Qub**
+ - **Quickline**
+ - **QuicklineLive**
+ - **R7**
+ - **R7Article**
+ - **radio.de**
+ - **radiobremen**
+ - **radiocanada**
+ - **radiocanada:audiovideo**
+ - **radiofrance**
+ - **RadioJavan**
+ - **Rai**
+ - **RaiPlay**
+ - **RaiPlayLive**
+ - **RaiPlayPlaylist**
+ - **RayWenderlich**
+ - **RayWenderlichCourse**
+ - **RBMARadio**
+ - **RDS**: RDS.ca
+ - **RedBull**
+ - **RedBullEmbed**
+ - **RedBullTV**
+ - **RedBullTVRrnContent**
+ - **Reddit**
+ - **RedditR**
+ - **RedTube**
+ - **RegioTV**
+ - **RENTV**
+ - **RENTVArticle**
+ - **Restudy**
+ - **Reuters**
+ - **ReverbNation**
+ - **RICE**
+ - **RMCDecouverte**
+ - **RockstarGames**
+ - **RoosterTeeth**
+ - **RottenTomatoes**
+ - **Roxwel**
+ - **Rozhlas**
+ - **RTBF**
+ - **rte**: Raidió Teilifís Éireann TV
+ - **rte:radio**: Raidió Teilifís Éireann radio
+ - **rtl.nl**: rtl.nl and rtlxl.nl
+ - **rtl2**
+ - **rtl2:you**
+ - **rtl2:you:series**
+ - **RTP**
+ - **RTS**: RTS.ch
+ - **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:infantil**: RTVE infantil
+ - **rtve.es:live**: RTVE.es live streams
+ - **rtve.es:television**
+ - **RTVNH**
+ - **RTVS**
+ - **RUHD**
+ - **RumbleEmbed**
+ - **rutube**: Rutube videos
+ - **rutube:channel**: Rutube channels
+ - **rutube:embed**: Rutube embedded videos
+ - **rutube:movie**: Rutube movies
+ - **rutube:person**: Rutube person videos
+ - **rutube:playlist**: Rutube playlists
+ - **RUTV**: RUTV.RU
+ - **Ruutu**
+ - **Ruv**
+ - **safari**: safaribooksonline.com online video
+ - **safari:api**
+ - **safari:course**: safaribooksonline.com online courses
+ - **SAKTV**
+ - **SaltTV**
+ - **SampleFocus**
+ - **Sapo**: SAPO Vídeos
+ - **savefrom.net**
+ - **SBS**: sbs.com.au
+ - **schooltv**
+ - **screen.yahoo:search**: Yahoo screen search
+ - **Screencast**
+ - **ScreencastOMatic**
+ - **ScrippsNetworks**
+ - **scrippsnetworks:watch**
+ - **SCTE**
+ - **SCTECourse**
+ - **Seeker**
+ - **SenateISVP**
+ - **SendtoNews**
+ - **Servus**
+ - **Sexu**
+ - **SeznamZpravy**
+ - **SeznamZpravyArticle**
+ - **Shahid**
+ - **ShahidShow**
+ - **Shared**: shared.sx
+ - **ShowRoomLive**
+ - **simplecast**
+ - **simplecast:episode**
+ - **simplecast:podcast**
+ - **Sina**
+ - **sky.it**
+ - **sky:news**
+ - **sky:sports**
+ - **sky:sports:news**
+ - **skyacademy.it**
+ - **SkylineWebcams**
+ - **skynewsarabia:article**
+ - **skynewsarabia:video**
+ - **Slideshare**
+ - **SlidesLive**
+ - **Slutload**
+ - **Snotr**
+ - **Sohu**
+ - **SonyLIV**
+ - **soundcloud**
+ - **soundcloud:playlist**
+ - **soundcloud:search**: Soundcloud search
+ - **soundcloud:set**
+ - **soundcloud:trackstation**
+ - **soundcloud:user**
+ - **SoundcloudEmbed**
+ - **soundgasm**
+ - **soundgasm:profile**
+ - **southpark.cc.com**
+ - **southpark.cc.com:español**
+ - **southpark.de**
+ - **southpark.nl**
+ - **southparkstudios.dk**
+ - **SpankBang**
+ - **SpankBangPlaylist**
+ - **Spankwire**
+ - **Spiegel**
+ - **sport.francetvinfo.fr**
+ - **Sport5**
+ - **SportBox**
+ - **SportDeutschland**
+ - **spotify**
+ - **spotify:show**
+ - **Spreaker**
+ - **SpreakerPage**
+ - **SpreakerShow**
+ - **SpreakerShowPage**
+ - **SpringboardPlatform**
+ - **Sprout**
+ - **sr:mediathek**: Saarländischer Rundfunk
+ - **SRGSSR**
+ - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
+ - **stanfordoc**: Stanford Open ClassRoom
+ - **Steam**
+ - **Stitcher**
+ - **StitcherShow**
+ - **StoryFire**
+ - **StoryFireSeries**
+ - **StoryFireUser**
+ - **Streamable**
+ - **streamcloud.eu**
+ - **StreamCZ**
+ - **StreetVoice**
+ - **StretchInternet**
+ - **stv:player**
+ - **SunPorno**
+ - **sverigesradio:episode**
+ - **sverigesradio:publication**
+ - **SVT**
+ - **SVTPage**
+ - **SVTPlay**: SVT Play and Öppet arkiv
+ - **SVTSeries**
+ - **SWRMediathek**
+ - **Syfy**
+ - **SztvHu**
+ - **t-online.de**
+ - **Tagesschau**
+ - **tagesschau:player**
+ - **Tass**
+ - **TBS**
+ - **TDSLifeway**
+ - **Teachable**
+ - **TeachableCourse**
+ - **teachertube**: teachertube.com videos
+ - **teachertube:user:collection**: teachertube.com user and collection videos
+ - **TeachingChannel**
+ - **Teamcoco**
+ - **TeamTreeHouse**
+ - **TechTalks**
+ - **techtv.mit.edu**
+ - **ted**
+ - **Tele13**
+ - **Tele5**
+ - **TeleBruxelles**
+ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
+ - **Telegraaf**
+ - **TeleMB**
+ - **TeleQuebec**
+ - **TeleQuebecEmission**
+ - **TeleQuebecLive**
+ - **TeleQuebecSquat**
+ - **TeleQuebecVideo**
+ - **TeleTask**
+ - **Telewebion**
+ - **TennisTV**
+ - **TenPlay**
+ - **TF1**
+ - **TFO**
+ - **TheIntercept**
+ - **ThePlatform**
+ - **ThePlatformFeed**
+ - **TheScene**
+ - **TheStar**
+ - **TheSun**
+ - **TheWeatherChannel**
+ - **ThisAmericanLife**
+ - **ThisAV**
+ - **ThisOldHouse**
+ - **TikTok**
+ - **TikTokUser** (Currently broken)
+ - **tinypic**: tinypic.com videos
+ - **TMZ**
+ - **TMZArticle**
+ - **TNAFlix**
+ - **TNAFlixNetworkEmbed**
+ - **toggle**
+ - **ToonGoggles**
+ - **tou.tv**
+ - **Toypics**: Toypics video
+ - **ToypicsUser**: Toypics user profile
+ - **TrailerAddict** (Currently broken)
+ - **Trilulilu**
+ - **Trovo**
+ - **TrovoVod**
+ - **TruNews**
+ - **TruTV**
+ - **Tube8**
+ - **TubiTv**
+ - **Tumblr**
+ - **tunein:clip**
+ - **tunein:program**
+ - **tunein:station**
+ - **tunein:topic**
+ - **TunePk**
+ - **Turbo**
+ - **tv.dfb.de**
+ - **TV2**
+ - **tv2.hu**
+ - **TV2Article**
+ - **TV2DK**
+ - **TV2DKBornholmPlay**
+ - **TV4**: tv4.se and tv4play.se
+ - **TV5MondePlus**: TV5MONDE+
+ - **tv5unis**
+ - **tv5unis:video**
+ - **tv8.it**
+ - **TVA**
+ - **TVANouvelles**
+ - **TVANouvellesArticle**
+ - **TVC**
+ - **TVCArticle**
+ - **TVer**
+ - **tvigle**: Интернет-телевидение Tvigle.ru
+ - **tvland.com**
+ - **TVN24**
+ - **TVNet**
+ - **TVNoe**
+ - **TVNow**
+ - **TVNowAnnual**
+ - **TVNowNew**
+ - **TVNowSeason**
+ - **TVNowShow**
+ - **tvp**: Telewizja Polska
+ - **tvp:embed**: Telewizja Polska
+ - **tvp:series**
+ - **TVPlayer**
+ - **TVPlayHome**
+ - **Tweakers**
+ - **TwitCasting**
+ - **twitch:clips**
+ - **twitch:stream**
+ - **twitch:vod**
+ - **TwitchCollection**
+ - **TwitchVideos**
+ - **TwitchVideosClips**
+ - **TwitchVideosCollections**
+ - **twitter**
+ - **twitter:amplify**
+ - **twitter:broadcast**
+ - **twitter:card**
+ - **udemy**
+ - **udemy:course**
+ - **UDNEmbed**: 聯合影音
+ - **UFCArabia**
+ - **UFCTV**
+ - **UKTVPlay**
+ - **umg:de**: Universal Music Deutschland
+ - **Unistra**
+ - **Unity**
+ - **uol.com.br**
+ - **uplynk**
+ - **uplynk:preplay**
+ - **Urort**: NRK P3 Urørt
+ - **URPlay**
+ - **USANetwork**
+ - **USAToday**
+ - **ustream**
+ - **ustream:channel**
+ - **ustudio**
+ - **ustudio:embed**
+ - **Varzesh3**
+ - **Vbox7**
+ - **VeeHD**
+ - **Veoh**
+ - **Vesti**: Вести.Ru
+ - **Vevo**
+ - **VevoPlaylist**
+ - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
+ - **vh1.com**
+ - **vhx:embed**
+ - **Viafree**
+ - **vice**
+ - **vice:article**
+ - **vice:show**
+ - **Vidbit**
+ - **Viddler**
+ - **Videa**
+ - **video.arnes.si**: Arnes Video
+ - **video.google:search**: Google Video search
+ - **video.sky.it**
+ - **video.sky.it:live**
+ - **VideoDetective**
+ - **videofy.me**
+ - **videomore**
+ - **videomore:season**
+ - **videomore:video**
+ - **VideoPress**
+ - **Vidio**
+ - **VidLii**
+ - **vidme**
+ - **vidme:user**
+ - **vidme:user:likes**
+ - **vier**: vier.be and vijf.be
+ - **vier:videos**
+ - **viewlift**
+ - **viewlift:embed**
+ - **Viidea**
+ - **viki**
+ - **viki:channel**
+ - **vimeo**
+ - **vimeo:album**
+ - **vimeo:channel**
+ - **vimeo:group**
+ - **vimeo:likes**: Vimeo user likes
+ - **vimeo:ondemand**
+ - **vimeo:review**: Review pages on vimeo
+ - **vimeo:user**
+ - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
+ - **Vimple**: Vimple - one-click video hosting
+ - **Vine**
+ - **vine:user**
+ - **Viqeo**
+ - **Viu**
+ - **viu:ott**
+ - **viu:playlist**
+ - **Vivo**: vivo.sx
+ - **vk**: VK
+ - **vk:uservideos**: VK - User's Videos
+ - **vk:wallpost**
+ - **vlive**
+ - **vlive:channel**
+ - **vlive:post**
+ - **Vodlocker**
+ - **VODPl**
+ - **VODPlatform**
+ - **VoiceRepublic**
+ - **Voot**
+ - **VoxMedia**
+ - **VoxMediaVolume**
+ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **Vrak**
+ - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
+ - **VrtNU**: VrtNU.be
+ - **vrv**
+ - **vrv:series**
+ - **VShare**
+ - **VTM**
+ - **VTXTV**
+ - **vube**: Vube.com
+ - **VuClip**
+ - **VVVVID**
+ - **VVVVIDShow**
+ - **VyboryMos**
+ - **Vzaar**
+ - **Wakanim**
+ - **Walla**
+ - **WalyTV**
+ - **washingtonpost**
+ - **washingtonpost:article**
+ - **wat.tv**
+ - **WatchBox**
+ - **WatchIndianPorn**: Watch Indian Porn
+ - **WDR**
+ - **wdr:mobile**
+ - **WDRElefant**
+ - **WDRPage**
+ - **Webcaster**
+ - **WebcasterFeed**
+ - **WebOfStories**
+ - **WebOfStoriesPlaylist**
+ - **Weibo**
+ - **WeiboMobile**
+ - **WeiqiTV**: WQTV
+ - **Wistia**
+ - **WistiaPlaylist**
+ - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
+ - **WorldStarHipHop**
+ - **WSJ**: Wall Street Journal
+ - **WSJArticle**
+ - **WWE**
+ - **XBef**
+ - **XboxClips**
+ - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
+ - **XHamster**
+ - **XHamsterEmbed**
+ - **XHamsterUser**
+ - **xiami:album**: 虾米音乐 - 专辑
+ - **xiami:artist**: 虾米音乐 - 歌手
+ - **xiami:collection**: 虾米音乐 - 精选集
+ - **xiami:song**: 虾米音乐
+ - **ximalaya**: 喜马拉雅FM
+ - **ximalaya:album**: 喜马拉雅FM 专辑
+ - **XMinus**
+ - **XNXX**
+ - **Xstream**
+ - **XTube**
+ - **XTubeUser**: XTube user profile
+ - **Xuite**: 隨意窩Xuite影音
+ - **XVideos**
+ - **XXXYMovies**
+ - **Yahoo**: Yahoo screen and movies
+ - **yahoo:gyao**
+ - **yahoo:gyao:player**
+ - **yahoo:japannews**: Yahoo! Japan News
+ - **YandexDisk**
+ - **yandexmusic:album**: Яндекс.Музыка - Альбом
+ - **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
+ - **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
+ - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
+ - **yandexmusic:track**: Яндекс.Музыка - Трек
+ - **YandexVideo**
+ - **YapFiles**
+ - **YesJapan**
+ - **yinyuetai:video**: 音悦Tai
+ - **Ynet**
+ - **YouJizz**
+ - **youku**: 优酷
+ - **youku:show**
+ - **YouNowChannel**
+ - **YouNowLive**
+ - **YouNowMoment**
+ - **YouPorn**
+ - **YourPorn**
+ - **YourUpload**
+ - **youtube**: YouTube.com
+ - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
+ - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
+ - **youtube:playlist**: YouTube.com playlists
+ - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
+ - **youtube:search**: YouTube.com searches
+ - **youtube:search:date**: YouTube.com searches, newest videos first
+ - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
+ - **youtube:tab**: YouTube.com tab
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **YoutubeYtBe**
+ - **YoutubeYtUser**
+ - **Zapiks**
+ - **Zattoo**
+ - **ZattooLive**
+ - **ZDF**
+ - **ZDFChannel**
+ - **Zhihu**
+ - **zingmp3**: mp3.zing.vn
+ - **zingmp3:album**
+ - **zoom**
+ - **Zype**
diff --git a/hypervideo.plugin.zsh b/hypervideo.plugin.zsh
new file mode 100644
index 0000000..a5ee9ec
--- /dev/null
+++ b/hypervideo.plugin.zsh
@@ -0,0 +1,24 @@
+# This allows the hypervideo command to be installed in ZSH using antigen.
+# Antigen is a bundle manager. It allows you to enhance the functionality of
+# your zsh session by installing bundles and themes easily.
+
+# Antigen documentation:
+# http://antigen.sharats.me/
+# https://github.com/zsh-users/antigen
+
+# Install hypervideo:
+# antigen bundle ytdl-org/hypervideo
+# Bundles installed by antigen are available for use immediately.
+
+# Update hypervideo (and all other antigen bundles):
+# antigen update
+
+# The antigen command will download the git repository to a folder and then
+# execute an enabling script (this file). The complete process for loading the
+# code is documented here:
+# https://github.com/zsh-users/antigen#notes-on-writing-plugins
+
+# This specific script just aliases hypervideo to the python script that this
+# library provides. This requires updating the PYTHONPATH to ensure that the
+# full set of code can be located.
+alias hypervideo="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/hypervideo"
diff --git a/hypervideo_dl/YoutubeDL.py b/hypervideo_dl/YoutubeDL.py
new file mode 100755
index 0000000..d8621ed
--- /dev/null
+++ b/hypervideo_dl/YoutubeDL.py
@@ -0,0 +1,2469 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import absolute_import, unicode_literals
+
+import collections
+import contextlib
+import copy
+import datetime
+import errno
+import fileinput
+import io
+import itertools
+import json
+import locale
+import operator
+import os
+import platform
+import re
+import shutil
+import subprocess
+import socket
+import sys
+import time
+import tokenize
+import traceback
+import random
+
+from string import ascii_letters
+
+from .compat import (
+ compat_basestring,
+ compat_cookiejar,
+ compat_get_terminal_size,
+ compat_http_client,
+ compat_kwargs,
+ compat_numeric_types,
+ compat_os_name,
+ compat_str,
+ compat_tokenize_tokenize,
+ compat_urllib_error,
+ compat_urllib_request,
+ compat_urllib_request_DataHandler,
+)
+from .utils import (
+ age_restricted,
+ args_to_str,
+ ContentTooShortError,
+ date_from_str,
+ DateRange,
+ DEFAULT_OUTTMPL,
+ determine_ext,
+ determine_protocol,
+ DownloadError,
+ encode_compat_str,
+ encodeFilename,
+ error_to_compat_str,
+ expand_path,
+ ExtractorError,
+ format_bytes,
+ formatSeconds,
+ GeoRestrictedError,
+ int_or_none,
+ ISO3166Utils,
+ locked_file,
+ make_HTTPS_handler,
+ MaxDownloadsReached,
+ orderedSet,
+ PagedList,
+ parse_filesize,
+ PerRequestProxyHandler,
+ platform_name,
+ PostProcessingError,
+ preferredencoding,
+ prepend_extension,
+ register_socks_protocols,
+ render_table,
+ replace_extension,
+ SameFileError,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ sanitized_Request,
+ std_headers,
+ str_or_none,
+ subtitles_filename,
+ UnavailableVideoError,
+ url_basename,
+ version_tuple,
+ write_json_file,
+ write_string,
+ YoutubeDLCookieJar,
+ YoutubeDLCookieProcessor,
+ YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
+)
+from .cache import Cache
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor.openload import PhantomJSwrapper
+from .downloader import get_suitable_downloader
+from .downloader.rtmp import rtmpdump_version
+from .postprocessor import (
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ get_postprocessor,
+)
+from .version import __version__
+
+if compat_os_name == 'nt':
+ import ctypes
+
+
+class YoutubeDL(object):
+ """YoutubeDL class.
+
+ YoutubeDL objects are the ones responsible of downloading the
+ actual video file and writing it to disk if the user has requested
+ it, among some other tasks. In most cases there should be one per
+ program. As, given a video URL, the downloader doesn't know how to
+ extract all the needed information, task that InfoExtractors do, it
+ has to pass the URL to one of them.
+
+ For this, YoutubeDL objects have a method that allows
+ InfoExtractors to be registered in a given order. When it is passed
+ a URL, the YoutubeDL object handles it to the first InfoExtractor it
+ finds that reports being able to handle it. The InfoExtractor extracts
+ all the information about the video or videos the URL refers to, and
+ YoutubeDL process the extracted information, possibly using a File
+ Downloader to download the video.
+
+ YoutubeDL objects accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead. These options are available through the params
+ attribute for the InfoExtractors to use. The YoutubeDL also
+ registers itself as the downloader in charge for the InfoExtractors
+ that are added to it, so this is a "mutual registration".
+
+ Available options:
+
+ username: Username for authentication purposes.
+ password: Password for authentication purposes.
+ videopassword: Password for accessing a video.
+ ap_mso: Adobe Pass multiple-system operator identifier.
+ ap_username: Multiple-system operator account username.
+ ap_password: Multiple-system operator account password.
+ usenetrc: Use netrc for authentication instead.
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ no_warnings: Do not print out anything for warnings.
+ forceurl: Force printing final URL.
+ forcetitle: Force printing title.
+ forceid: Force printing ID.
+ forcethumbnail: Force printing thumbnail URL.
+ forcedescription: Force printing description.
+ forcefilename: Force printing final filename.
+ forceduration: Force printing duration.
+ forcejson: Force printing info_dict as JSON.
+ dump_single_json: Force printing the info_dict of the whole playlist
+ (or video) as a single JSON line.
+ simulate: Do not download the video files.
+ format: Video format code. See options.py for more information.
+ outtmpl: Template for output names.
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ ignoreerrors: Do not stop on download errors.
+ force_generic_extractor: Force downloader to use the generic extractor
+ nooverwrites: Prevent overwriting files.
+ playliststart: Playlist item to start at.
+ playlistend: Playlist item to end at.
+ playlist_items: Specific indices of playlist to download.
+ playlistreverse: Download playlist items in reverse order.
+ playlistrandom: Download playlist items in random order.
+ matchtitle: Download only matching titles.
+ rejecttitle: Reject downloads for matching titles.
+ logger: Log messages to a logging.Logger instance.
+ logtostderr: Log messages to stderr instead of stdout.
+ writedescription: Write the video description to a .description file
+ writeinfojson: Write the video description to a .info.json file
+ writeannotations: Write the video annotations to a .annotations.xml file
+ writethumbnail: Write the thumbnail image to a file
+ write_all_thumbnails: Write all thumbnail formats to files
+ writesubtitles: Write the video subtitles to a file
+ writeautomaticsub: Write the automatically generated subtitles to a file
+ allsubtitles: Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ listsubtitles: Lists all available subtitles for the video
+ subtitlesformat: The format code for subtitles
+ subtitleslangs: List of languages of the subtitles to download
+ keepvideo: Keep the video file after post-processing
+ daterange: A DateRange object, download only if the upload_date is in the range.
+ skip_download: Skip the actual download of the video file
+ cachedir: Location of the cache files in the filesystem.
+ False to disable filesystem cache.
+ noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
+ min_views: An integer representing the minimum view count the video
+ must have in order to not be skipped.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ max_views: An integer representing the maximum view count.
+ Videos that are more popular than that are not
+ downloaded.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ download_archive: File name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded
+ again.
+ cookiefile: File name where cookies should be read from and dumped to.
+ nocheckcertificate:Do not verify SSL certificates
+ prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
+ At the moment, this is only supported by YouTube.
+ proxy: URL of the proxy server to use
+ geo_verification_proxy: URL of the proxy to use for IP address verification
+ on geo-restricted sites.
+ socket_timeout: Time to wait for unresponsive hosts, in seconds
+ bidi_workaround: Work around buggy terminals without bidirectional text
+ support, using fridibi
+ debug_printtraffic:Print out sent and received HTTP traffic
+ include_ads: Download ads as well
+ default_search: Prepend this string if an input url is not valid.
+ 'auto' for elaborate guessing
+ encoding: Use this encoding instead of the system-specified.
+ extract_flat: Do not resolve URLs, return the immediate result.
+ Pass in 'in_playlist' to only show this behavior for
+ playlist items.
+ postprocessors: A list of dictionaries, each with an entry
+ * key: The name of the postprocessor. See
+ hypervideo_dl/postprocessor/__init__.py for a list.
+ as well as any further keyword arguments for the
+ postprocessor.
+ progress_hooks: A list of functions that get called on download
+ progress, with a dictionary with the entries
+ * status: One of "downloading", "error", or "finished".
+ Check this first and ignore unknown values.
+
+ If status is one of "downloading", or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
+ * downloaded_bytes: Bytes on disk
+ * total_bytes: Size of the whole file, None if unknown
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
+ * eta: The estimated time in seconds, None if unknown
+ * speed: The download speed in bytes/second, None if
+ unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
+
+ Progress hooks are guaranteed to be called at least once
+ (with status "finished") if the download is successful.
+ merge_output_format: Extension to use when merging formats.
+ fixup: Automatically correct known faults of the file.
+ One of:
+ - "never": do nothing
+ - "warn": only emit a warning
+ - "detect_or_warn": check whether we can do anything
+ about it, warn otherwise (default)
+ source_address: Client-side IP address to bind to.
+ call_home: Boolean, true iff we are allowed to contact the
+ hypervideo servers for debugging.
+ sleep_interval: Number of seconds to sleep before each download when
+ used alone or a lower bound of a range for randomized
+ sleep before each download (minimum possible number
+ of seconds to sleep) when used along with
+ max_sleep_interval.
+ max_sleep_interval:Upper bound of a range for randomized sleep before each
+ download (maximum possible number of seconds to sleep).
+ Must only be used along with sleep_interval.
+ Actual sleep time will be a random float from range
+ [sleep_interval; max_sleep_interval].
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
+ no_color: Do not emit color codes in output.
+ geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
+ HTTP header
+ geo_bypass_country:
+ Two-letter ISO 3166-2 country code that will be used for
+ explicit geographic restriction bypassing via faking
+ X-Forwarded-For HTTP header
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country
+
+ The following options determine which downloader is picked:
+ external_downloader: Executable of the external downloader to call.
+ None or unset for standard (built-in) downloader.
+ hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+
+ The following parameters are not used by YoutubeDL itself, they are used by
+ the downloader (see hypervideo_dl/downloader/common.py):
+ nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
+ noresizebuffer, retries, continuedl, noprogress, consoletitle,
+ xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+ http_chunk_size.
+
+ The following options are used by the post processors:
+ prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
+ to the binary or its containing directory.
+ postprocessor_args: A list of additional command-line arguments for the
+ postprocessor.
+
+ The following options are used by the Youtube extractor:
+ youtube_include_dash_manifest: If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH.
+ """
+
+ _NUMERIC_FIELDS = set((
+ 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ 'playlist_index',
+ ))
+
+ params = None
+ _ies = []
+ _pps = []
+ _download_retcode = None
+ _num_downloads = None
+ _playlist_level = 0
+ _playlist_urls = set()
+ _screen_file = None
+
+ def __init__(self, params=None, auto_init=True):
+ """Create a FileDownloader object with the given options."""
+ if params is None:
+ params = {}
+ self._ies = []
+ self._ies_instances = {}
+ self._pps = []
+ self._progress_hooks = []
+ self._download_retcode = 0
+ self._num_downloads = 0
+ self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+ self._err_file = sys.stderr
+ self.params = {
+ # Default parameters
+ 'nocheckcertificate': False,
+ }
+ self.params.update(params)
+ self.cache = Cache(self)
+
+ def check_deprecated(param, option, suggestion):
+ if self.params.get(param) is not None:
+ self.report_warning(
+ '%s is deprecated. Use %s instead.' % (option, suggestion))
+ return True
+ return False
+
+ if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
+ if self.params.get('geo_verification_proxy') is None:
+ self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+
+ check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
+ check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+ check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+
+ if params.get('bidi_workaround', False):
+ try:
+ import pty
+ master, slave = pty.openpty()
+ width = compat_get_terminal_size().columns
+ if width is None:
+ width_args = []
+ else:
+ width_args = ['-w', str(width)]
+ sp_kwargs = dict(
+ stdin=subprocess.PIPE,
+ stdout=slave,
+ stderr=self._err_file)
+ try:
+ self._output_process = subprocess.Popen(
+ ['bidiv'] + width_args, **sp_kwargs
+ )
+ except OSError:
+ self._output_process = subprocess.Popen(
+ ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
+ except OSError as ose:
+ if ose.errno == errno.ENOENT:
+ self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ else:
+ raise
+
+ if (sys.platform != 'win32'
+ and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not params.get('restrictfilenames', False)):
+ # Unicode filesystem API will throw errors (#1474, #13027)
+ self.report_warning(
+ 'Assuming --restrict-filenames since file system encoding '
+ 'cannot encode all characters. '
+ 'Set the LC_ALL environment variable to fix this.')
+ self.params['restrictfilenames'] = True
+
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+
+ self._setup_opener()
+
+ if auto_init:
+ self.print_debug_header()
+ self.add_default_info_extractors()
+
+ for pp_def_raw in self.params.get('postprocessors', []):
+ pp_class = get_postprocessor(pp_def_raw['key'])
+ pp_def = dict(pp_def_raw)
+ del pp_def['key']
+ pp = pp_class(self, **compat_kwargs(pp_def))
+ self.add_post_processor(pp)
+
+ for ph in self.params.get('progress_hooks', []):
+ self.add_progress_hook(ph)
+
+ register_socks_protocols()
+
+ def warn_if_short_id(self, argv):
+ # short YouTube ID starting with dash?
+ idxs = [
+ i for i, a in enumerate(argv)
+ if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+ if idxs:
+ correct_argv = (
+ ['hypervideo']
+ + [a for i, a in enumerate(argv) if i not in idxs]
+ + ['--'] + [argv[i] for i in idxs]
+ )
+ self.report_warning(
+ 'Long argument string detected. '
+ 'Use -- to separate parameters and URLs, like this:\n%s\n' %
+ args_to_str(correct_argv))
+
+ def add_info_extractor(self, ie):
+ """Add an InfoExtractor object to the end of the list."""
+ self._ies.append(ie)
+ if not isinstance(ie, type):
+ self._ies_instances[ie.ie_key()] = ie
+ ie.set_downloader(self)
+
+ def get_info_extractor(self, ie_key):
+ """
+ Get an instance of an IE with name ie_key, it will try to get one from
+ the _ies list, if there's no instance it will create a new one and add
+ it to the extractor list.
+ """
+ ie = self._ies_instances.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)()
+ self.add_info_extractor(ie)
+ return ie
+
+ def add_default_info_extractors(self):
+ """
+ Add the InfoExtractors returned by gen_extractors to the end of the list
+ """
+ for ie in gen_extractor_classes():
+ self.add_info_extractor(ie)
+
+ def add_post_processor(self, pp):
+ """Add a PostProcessor object to the end of the chain."""
+ self._pps.append(pp)
+ pp.set_downloader(self)
+
+ def add_progress_hook(self, ph):
+ """Add the progress hook (currently only for the file downloader)"""
+ self._progress_hooks.append(ph)
+
+ def _bidi_workaround(self, message):
+ if not hasattr(self, '_output_channel'):
+ return message
+
+ assert hasattr(self, '_output_process')
+ assert isinstance(message, compat_str)
+ line_count = message.count('\n') + 1
+ self._output_process.stdin.write((message + '\n').encode('utf-8'))
+ self._output_process.stdin.flush()
+ res = ''.join(self._output_channel.readline().decode('utf-8')
+ for _ in range(line_count))
+ return res[:-len('\n')]
+
+ def to_screen(self, message, skip_eol=False):
+ """Print message to stdout if not in quiet mode."""
+ return self.to_stdout(message, skip_eol, check_quiet=True)
+
+ def _write_string(self, s, out=None):
+ write_string(s, out=out, encoding=self.params.get('encoding'))
+
+ def to_stdout(self, message, skip_eol=False, check_quiet=False):
+ """Print message to stdout if not in quiet mode."""
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ elif not check_quiet or not self.params.get('quiet', False):
+ message = self._bidi_workaround(message)
+ terminator = ['\n', ''][skip_eol]
+ output = message + terminator
+
+ self._write_string(output, self._screen_file)
+
+ def to_stderr(self, message):
+ """Print message to stderr."""
+ assert isinstance(message, compat_str)
+ if self.params.get('logger'):
+ self.params['logger'].error(message)
+ else:
+ message = self._bidi_workaround(message)
+ output = message + '\n'
+ self._write_string(output, self._err_file)
+
+ def to_console_title(self, message):
+ if not self.params.get('consoletitle', False):
+ return
+ if compat_os_name == 'nt':
+ if ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ elif 'TERM' in os.environ:
+ self._write_string('\033]0;%s\007' % message, self._screen_file)
+
+ def save_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
+ # Save the title on stack
+ self._write_string('\033[22;0t', self._screen_file)
+
+ def restore_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
+ # Restore the title from stack
+ self._write_string('\033[23;0t', self._screen_file)
+
+ def __enter__(self):
+ self.save_console_title()
+ return self
+
+ def __exit__(self, *args):
+ self.restore_console_title()
+
+ if self.params.get('cookiefile') is not None:
+ self.cookiejar.save(ignore_discard=True, ignore_expires=True)
+
+ def trouble(self, message=None, tb=None):
+ """Determine action to take when a download problem appears.
+
+ Depending on if the downloader has been configured to ignore
+ download errors or not, this method may throw an exception or
+ not when errors are found, after printing the message.
+
+ tb, if given, is additional traceback information.
+ """
+ if message is not None:
+ self.to_stderr(message)
+ if self.params.get('verbose'):
+ if tb is None:
+ if sys.exc_info()[0]: # if .trouble has been called from an except block
+ tb = ''
+ if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
+ tb += encode_compat_str(traceback.format_exc())
+ else:
+ tb_data = traceback.format_list(traceback.extract_stack())
+ tb = ''.join(tb_data)
+ self.to_stderr(tb)
+ if not self.params.get('ignoreerrors', False):
+ if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ exc_info = sys.exc_info()[1].exc_info
+ else:
+ exc_info = sys.exc_info()
+ raise DownloadError(message, exc_info)
+ self._download_retcode = 1
+
+ def report_warning(self, message):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning(message)
+ else:
+ if self.params.get('no_warnings'):
+ return
+ if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;33mWARNING:\033[0m'
+ else:
+ _msg_header = 'WARNING:'
+ warning_message = '%s %s' % (_msg_header, message)
+ self.to_stderr(warning_message)
+
+ def report_error(self, message, tb=None):
+ '''
+ Do the same as trouble, but prefixes the message with 'ERROR:', colored
+ in red if stderr is a tty file.
+ '''
+ if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;31mERROR:\033[0m'
+ else:
+ _msg_header = 'ERROR:'
+ error_message = '%s %s' % (_msg_header, message)
+ self.trouble(error_message, tb)
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def prepare_filename(self, info_dict):
+ """Generate the output filename."""
+ try:
+ template_dict = dict(info_dict)
+
+ template_dict['epoch'] = int(time.time())
+ autonumber_size = self.params.get('autonumber_size')
+ if autonumber_size is None:
+ autonumber_size = 5
+ template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ if template_dict.get('resolution') is None:
+ if template_dict.get('width') and template_dict.get('height'):
+ template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+ elif template_dict.get('height'):
+ template_dict['resolution'] = '%sp' % template_dict['height']
+ elif template_dict.get('width'):
+ template_dict['resolution'] = '%dx?' % template_dict['width']
+
+ sanitize = lambda k, v: sanitize_filename(
+ compat_str(v),
+ restricted=self.params.get('restrictfilenames'),
+ is_id=(k == 'id' or k.endswith('_id')))
+ template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
+ for k, v in template_dict.items()
+ if v is not None and not isinstance(v, (list, tuple, dict)))
+ template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
+
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+
+ # For fields playlist_index and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': len(str(template_dict['n_entries'])),
+ 'autonumber': autonumber_size,
+ }
+ FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+ mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+ if mobj:
+ outtmpl = re.sub(
+ FIELD_SIZE_COMPAT_RE,
+ r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+ outtmpl)
+
+ # Missing numeric fields used together with integer presentation types
+ # in format specification will break the argument substitution since
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
+ for numeric_field in self._NUMERIC_FIELDS:
+ if numeric_field not in template_dict:
+ # As of [1] format syntax is:
+ # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+ # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+ FORMAT_RE = r'''(?x)
+ (?<!%)
+ %
+ \({0}\) # mapping key
+ (?:[#0\-+ ]+)? # conversion flags (optional)
+ (?:\d+)? # minimum field width (optional)
+ (?:\.\d+)? # precision (optional)
+ [hlL]? # length modifier (optional)
+ [diouxXeEfFgGcrs%] # conversion type
+ '''
+ outtmpl = re.sub(
+ FORMAT_RE.format(numeric_field),
+ r'%({0})s'.format(numeric_field), outtmpl)
+
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ filename = encodeFilename(filename, True).decode(preferredencoding())
+ return sanitize_path(filename)
+ except ValueError as err:
+ self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+ return None
+
+ def _match_entry(self, info_dict, incomplete):
+ """ Returns None iff the file should be downloaded """
+
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+ if self.in_download_archive(info_dict):
+ return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
+ return None
+
+ @staticmethod
+ def add_extra_info(info_dict, extra_info):
+ '''Set the keys from extra_info in info dict if they are missing'''
+ for key, value in extra_info.items():
+ info_dict.setdefault(key, value)
+
+ def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ process=True, force_generic_extractor=False):
+ """
+ Return a list with a dictionary for each video extracted.
+
+ Arguments:
+ url -- URL to extract
+
+ Keyword arguments:
+ download -- whether to download videos during extraction
+ ie_key -- extractor key hint
+ extra_info -- dictionary containing the extra values to add to each result
+ process -- whether to resolve all unresolved references (URLs, playlist items),
+ must be True for download to work.
+ force_generic_extractor -- force using the generic extractor
+ """
+
+ if not ie_key and force_generic_extractor:
+ ie_key = 'Generic'
+
+ if ie_key:
+ ies = [self.get_info_extractor(ie_key)]
+ else:
+ ies = self._ies
+
+ for ie in ies:
+ if not ie.suitable(url):
+ continue
+
+ ie = self.get_info_extractor(ie.ie_key())
+ if not ie.working():
+ self.report_warning('The program functionality for this site has been marked as broken, '
+ 'and will probably not work.')
+
+ return self.__extract_info(url, ie, download, extra_info, process)
+ else:
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def __handle_extraction_exceptions(func):
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(compat_str(e), e.format_traceback())
+ except MaxDownloadsReached:
+ raise
+ except Exception as e:
+ if self.params.get('ignoreerrors', False):
+ self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
+ else:
+ raise
+ return wrapper
+
+ @__handle_extraction_exceptions
+ def __extract_info(self, url, ie, download, extra_info, process):
+ ie_result = ie.extract(url)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ return
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
+ else:
+ return ie_result
+
+ def add_default_extra_info(self, ie_result, ie, url):
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url,
+ 'webpage_url_basename': url_basename(url),
+ 'extractor_key': ie.ie_key(),
+ })
+
+ def process_ie_result(self, ie_result, download=True, extra_info={}):
+ """
+ Take the result of the ie(may be modified) and resolve all unresolved
+ references (URLs, playlist items).
+
+ It will also download the videos if 'download'.
+ Returns the resolved ie_result.
+ """
+ result_type = ie_result.get('_type', 'video')
+
+ if result_type in ('url', 'url_transparent'):
+ ie_result['url'] = sanitize_url(ie_result['url'])
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
+ or extract_flat is True):
+ self.__forced_printings(
+ ie_result, self.prepare_filename(ie_result),
+ incomplete=True)
+ return ie_result
+
+ if result_type == 'video':
+ self.add_extra_info(ie_result, extra_info)
+ return self.process_video_result(ie_result, download=download)
+ elif result_type == 'url':
+ # We have to add extra_info to the results because it may be
+ # contained in a playlist
+ return self.extract_info(ie_result['url'],
+ download,
+ ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info)
+ elif result_type == 'url_transparent':
+ # Use the information from the embedding page
+ info = self.extract_info(
+ ie_result['url'], ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info, download=False, process=False)
+
+ # extract_info may return None when ignoreerrors is enabled and
+ # extraction failed with an error, don't crash and return early
+ # in this case
+ if not info:
+ return info
+
+ force_properties = dict(
+ (k, v) for k, v in ie_result.items() if v is not None)
+ for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
+ if f in force_properties:
+ del force_properties[f]
+ new_result = info.copy()
+ new_result.update(force_properties)
+
+ # Extracted info may not be a video result (i.e.
+ # info.get('_type', 'video') != video) but rather an url or
+ # url_transparent. In such cases outer metadata (from ie_result)
+ # should be propagated to inner one (info). For this to happen
+ # _type of info should be overridden with url_transparent. This
+ # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
+ if new_result.get('_type') == 'url':
+ new_result['_type'] = 'url_transparent'
+
+ return self.process_ie_result(
+ new_result, download=download, extra_info=extra_info)
+ elif result_type in ('playlist', 'multi_video'):
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result['webpage_url']
+ if webpage_url in self._playlist_urls:
+ self.to_screen(
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
+
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
+ elif result_type == 'compat_list':
+ self.report_warning(
+ 'Extractor %s returned a compat_list result. '
+ 'It needs to be updated.' % ie_result.get('extractor'))
+
+ def _fixup(r):
+ self.add_extra_info(
+ r,
+ {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+ )
+ return r
+ ie_result['entries'] = [
+ self.process_ie_result(_fixup(r), download, extra_info)
+ for r in ie_result['entries']
+ ]
+ return ie_result
+ else:
+ raise Exception('Invalid result type: %s' % result_type)
+
+ def __process_playlist(self, ie_result, download):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ reason = self._match_entry(entry, incomplete=True)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ continue
+
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+
+ @__handle_extraction_exceptions
+ def __process_iterable_entry(self, entry, download, extra_info):
+ return self.process_ie_result(
+ entry, download=download, extra_info=extra_info)
+
+ def _build_format_filter(self, filter_spec):
+ " Returns a function to filter the formats according to the filter_spec "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+ $
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.search(filter_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), filter_spec))
+ op = OPERATORS[m.group('op')]
+
+ if not m:
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '*=': lambda attr, value: value in attr,
+ }
+ str_operator_rex = re.compile(r'''(?x)
+ \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9._-]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(filter_spec)
+ if m:
+ comparison_value = m.group('value')
+ str_op = STR_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not str_op(attr, value)
+ else:
+ op = str_op
+
+ if not m:
+ raise ValueError('Invalid filter specification %r' % filter_spec)
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ return _filter
+
+ def _default_format_spec(self, info_dict, download=True):
+
+ def can_merge():
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+
+ def prefer_best():
+ if self.params.get('simulate', False):
+ return False
+ if not download:
+ return False
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return True
+ if info_dict.get('is_live'):
+ return True
+ if not can_merge():
+ return True
+ return False
+
+ req_format_list = ['bestvideo+bestaudio', 'best']
+ if prefer_best():
+ req_format_list.reverse()
+ return '/'.join(req_format_list)
+
+ def build_format_selector(self, format_spec):
+ def syntax_error(note, start):
+ message = (
+ 'Invalid format specification: '
+ '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
+ return SyntaxError(message)
+
+ PICKFIRST = 'PICKFIRST'
+ MERGE = 'MERGE'
+ SINGLE = 'SINGLE'
+ GROUP = 'GROUP'
+ FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+
+ def _parse_filter(tokens):
+ filter_parts = []
+ for type, string, start, _, _ in tokens:
+ if type == tokenize.OP and string == ']':
+ return ''.join(filter_parts)
+ else:
+ filter_parts.append(string)
+
+ def _remove_unused_ops(tokens):
+ # Remove operators that we don't use and join them with the surrounding strings
+ # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ ALLOWED_OPS = ('/', '+', ',', '(', ')')
+ last_string, last_start, last_end, last_line = None, None, None, None
+ for type, string, start, end, line in tokens:
+ if type == tokenize.OP and string == '[':
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ # everything inside brackets will be handled by _parse_filter
+ for type, string, start, end, line in tokens:
+ yield type, string, start, end, line
+ if type == tokenize.OP and string == ']':
+ break
+ elif type == tokenize.OP and string in ALLOWED_OPS:
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+ if not last_string:
+ last_string = string
+ last_start = start
+ last_end = end
+ else:
+ last_string += string
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+
+ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
+ selectors = []
+ current_selector = None
+ for type, string, start, _, _ in tokens:
+ # ENCODING is only defined in python 3.x
+ if type == getattr(tokenize, 'ENCODING', None):
+ continue
+ elif type in [tokenize.NAME, tokenize.NUMBER]:
+ current_selector = FormatSelector(SINGLE, string, [])
+ elif type == tokenize.OP:
+ if string == ')':
+ if not inside_group:
+ # ')' will be handled by the parentheses group
+ tokens.restore_last_token()
+ break
+ elif inside_merge and string in ['/', ',']:
+ tokens.restore_last_token()
+ break
+ elif inside_choice and string == ',':
+ tokens.restore_last_token()
+ break
+ elif string == ',':
+ if not current_selector:
+ raise syntax_error('"," must follow a format selector', start)
+ selectors.append(current_selector)
+ current_selector = None
+ elif string == '/':
+ if not current_selector:
+ raise syntax_error('"/" must follow a format selector', start)
+ first_choice = current_selector
+ second_choice = _parse_format_selection(tokens, inside_choice=True)
+ current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
+ elif string == '[':
+ if not current_selector:
+ current_selector = FormatSelector(SINGLE, 'best', [])
+ format_filter = _parse_filter(tokens)
+ current_selector.filters.append(format_filter)
+ elif string == '(':
+ if current_selector:
+ raise syntax_error('Unexpected "("', start)
+ group = _parse_format_selection(tokens, inside_group=True)
+ current_selector = FormatSelector(GROUP, group, [])
+ elif string == '+':
+ if inside_merge:
+ raise syntax_error('Unexpected "+"', start)
+ video_selector = current_selector
+ audio_selector = _parse_format_selection(tokens, inside_merge=True)
+ if not video_selector or not audio_selector:
+ raise syntax_error('"+" must be between two format selectors', start)
+ current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
+ else:
+ raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
+ elif type == tokenize.ENDMARKER:
+ break
+ if current_selector:
+ selectors.append(current_selector)
+ return selectors
+
+ def _build_selector_function(selector):
+ if isinstance(selector, list):
+ fs = [_build_selector_function(s) for s in selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ for format in f(ctx):
+ yield format
+ return selector_function
+ elif selector.type == GROUP:
+ selector_function = _build_selector_function(selector.selector)
+ elif selector.type == PICKFIRST:
+ fs = [_build_selector_function(s) for s in selector.selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ picked_formats = list(f(ctx))
+ if picked_formats:
+ return picked_formats
+ return []
+ elif selector.type == SINGLE:
+ format_spec = selector.selector
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if not formats:
+ return
+ if format_spec == 'all':
+ for f in formats:
+ yield f
+ elif format_spec in ['best', 'worst', None]:
+ format_idx = 0 if format_spec == 'worst' else -1
+ audiovideo_formats = [
+ f for f in formats
+ if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+ if audiovideo_formats:
+ yield audiovideo_formats[format_idx]
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) we will fallback to best/worst
+ # {video,audio}-only format
+ elif ctx['incomplete_formats']:
+ yield formats[format_idx]
+ elif format_spec == 'bestaudio':
+ audio_formats = [
+ f for f in formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ yield audio_formats[-1]
+ elif format_spec == 'worstaudio':
+ audio_formats = [
+ f for f in formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ yield audio_formats[0]
+ elif format_spec == 'bestvideo':
+ video_formats = [
+ f for f in formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ yield video_formats[-1]
+ elif format_spec == 'worstvideo':
+ video_formats = [
+ f for f in formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ yield video_formats[0]
+ else:
+ extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
+ if format_spec in extensions:
+ filter_f = lambda f: f['ext'] == format_spec
+ else:
+ filter_f = lambda f: f['format_id'] == format_spec
+ matches = list(filter(filter_f, formats))
+ if matches:
+ yield matches[-1]
+ elif selector.type == MERGE:
+ def _merge(formats_info):
+ format_1, format_2 = [f['format_id'] for f in formats_info]
+ # The first format must contain the video and the
+ # second the audio
+ if formats_info[0].get('vcodec') == 'none':
+ self.report_error('The first format must '
+ 'contain the video, try using '
+ '"-f %s+%s"' % (format_2, format_1))
+ return
+ # Formats must be opposite (video+audio)
+ if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
+ self.report_error(
+ 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
+ % (format_1, format_2))
+ return
+ output_ext = (
+ formats_info[0]['ext']
+ if self.params.get('merge_output_format') is None
+ else self.params['merge_output_format'])
+ return {
+ 'requested_formats': formats_info,
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
+ 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+ formats_info[1].get('format_id')),
+ 'width': formats_info[0].get('width'),
+ 'height': formats_info[0].get('height'),
+ 'resolution': formats_info[0].get('resolution'),
+ 'fps': formats_info[0].get('fps'),
+ 'vcodec': formats_info[0].get('vcodec'),
+ 'vbr': formats_info[0].get('vbr'),
+ 'stretched_ratio': formats_info[0].get('stretched_ratio'),
+ 'acodec': formats_info[1].get('acodec'),
+ 'abr': formats_info[1].get('abr'),
+ 'ext': output_ext,
+ }
+ video_selector, audio_selector = map(_build_selector_function, selector.selector)
+
+ def selector_function(ctx):
+ for pair in itertools.product(
+ video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+ yield _merge(pair)
+
+ filters = [self._build_format_filter(f) for f in selector.filters]
+
+ def final_selector(ctx):
+ ctx_copy = copy.deepcopy(ctx)
+ for _filter in filters:
+ ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
+ return selector_function(ctx_copy)
+ return final_selector
+
+ stream = io.BytesIO(format_spec.encode('utf-8'))
+ try:
+ tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ except tokenize.TokenError:
+ raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+ class TokenIterator(object):
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.counter = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.counter >= len(self.tokens):
+ raise StopIteration()
+ value = self.tokens[self.counter]
+ self.counter += 1
+ return value
+
+ next = __next__
+
+ def restore_last_token(self):
+ self.counter -= 1
+
+ parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
+ return _build_selector_function(parsed_selector)
+
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ if 'X-Forwarded-For' not in res:
+ x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+ if x_forwarded_for_ip:
+ res['X-Forwarded-For'] = x_forwarded_for_ip
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ pr = sanitized_Request(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.get_header('Cookie')
+
+ def process_video_result(self, info_dict, download=True):
+ assert info_dict.get('_type', 'video') == 'video'
+
+ if 'id' not in info_dict:
+ raise ExtractorError('Missing "id" field in extractor result')
+ if 'title' not in info_dict:
+ raise ExtractorError('Missing "title" field in extractor result')
+
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, compat_str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = compat_str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, compat_numeric_types):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
+
+ if 'playlist' not in info_dict:
+ # It isn't part of a playlist
+ info_dict['playlist'] = None
+ info_dict['playlist_index'] = None
+
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if thumbnails:
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '', t.get('url')))
+ for i, t in enumerate(thumbnails):
+ t['url'] = sanitize_url(t['url'])
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
+
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnail'] = sanitize_url(thumbnail)
+ elif thumbnails:
+ info_dict['thumbnail'] = thumbnails[-1]['url']
+
+ if 'display_id' not in info_dict and 'id' in info_dict:
+ info_dict['display_id'] = info_dict['id']
+
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
+ for cc_kind in ('subtitles', 'automatic_captions'):
+ cc = info_dict.get(cc_kind)
+ if cc:
+ for _, subtitle in cc.items():
+ for subtitle_format in subtitle:
+ if subtitle_format.get('url'):
+ subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+ if subtitle_format.get('ext') is None:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+ automatic_captions = info_dict.get('automatic_captions')
+ subtitles = info_dict.get('subtitles')
+
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
+ self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
+ return
+
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], subtitles, automatic_captions)
+
+ # We now pick which formats have to be downloaded
+ if info_dict.get('formats') is None:
+ # There's only one format available
+ formats = [info_dict]
+ else:
+ formats = info_dict['formats']
+
+ if not formats:
+ raise ExtractorError('No video formats found!')
+
+ def is_wellformed(f):
+ url = f.get('url')
+ if not url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats))
+
+ formats_dict = {}
+
+ # We check that all the formats have the format and format_id fields
+ for i, format in enumerate(formats):
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
+ format['url'] = sanitize_url(format['url'])
+ if not format.get('format_id'):
+ format['format_id'] = compat_str(i)
+ else:
+ # Sanitize format_id from characters used in format selector expression
+ format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
+ format_id = format['format_id']
+ if format_id not in formats_dict:
+ formats_dict[format_id] = []
+ formats_dict[format_id].append(format)
+
+ # Make sure all formats have unique format_id
+ for format_id, ambiguous_formats in formats_dict.items():
+ if len(ambiguous_formats) > 1:
+ for i, format in enumerate(ambiguous_formats):
+ format['format_id'] = '%s-%d' % (format_id, i)
+
+ for i, format in enumerate(formats):
+ if format.get('format') is None:
+ format['format'] = '{id} - {res}{note}'.format(
+ id=format['format_id'],
+ res=self.format_resolution(format),
+ note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
+ )
+ # Automatically determine file extension if missing
+ if format.get('ext') is None:
+ format['ext'] = determine_ext(format['url']).lower()
+ # Automatically determine protocol if missing (useful for format
+ # selection purposes)
+ if format.get('protocol') is None:
+ format['protocol'] = determine_protocol(format)
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
+ # Remove private housekeeping stuff
+ if '__x_forwarded_for_ip' in info_dict:
+ del info_dict['__x_forwarded_for_ip']
+
+ # TODO Central sorting goes here
+
+ if formats[0] is not info_dict:
+ # only set the 'formats' fields if the original info_dict list them
+ # otherwise we end up with a circular reference, the first (and unique)
+ # element in the 'formats' field in info_dict is info_dict itself,
+ # which can't be exported to json
+ info_dict['formats'] = formats
+ if self.params.get('listformats'):
+ self.list_formats(info_dict)
+ return
+
+ req_format = self.params.get('format')
+ if req_format is None:
+ req_format = self._default_format_spec(info_dict, download=download)
+ if self.params.get('verbose'):
+ self._write_string('[debug] Default format spec: %s\n' % req_format)
+
+ format_selector = self.build_format_selector(req_format)
+
+ # While in format selection we may need to have an access to the original
+ # format set in order to calculate some metrics or do some processing.
+ # For now we need to be able to guess whether original formats provided
+ # by extractor are incomplete or not (i.e. whether extractor provides only
+ # video-only or audio-only formats) for proper formats selection for
+ # extractors with such incomplete formats (see
+ # https://github.com/ytdl-org/youtube-dl/pull/5556).
+ # Since formats may be filtered during format selection and may not match
+ # the original formats the results may be incorrect. Thus original formats
+ # or pre-calculated metrics should be passed to format selection routines
+ # as well.
+ # We will pass a context object containing all necessary additional data
+ # instead of just formats.
+ # This fixes incorrect format selection issue (see
+ # https://github.com/ytdl-org/youtube-dl/issues/10083).
+ incomplete_formats = (
+ # All formats are video-only or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
+ # all formats are audio-only
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+
+ ctx = {
+ 'formats': formats,
+ 'incomplete_formats': incomplete_formats,
+ }
+
+ formats_to_download = list(format_selector(ctx))
+ if not formats_to_download:
+ raise ExtractorError('requested format not available',
+ expected=True)
+
+ if download:
+ if len(formats_to_download) > 1:
+ self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
+ for format in formats_to_download:
+ new_info = dict(info_dict)
+ new_info.update(format)
+ self.process_info(new_info)
+ # We update the info dict with the best quality format (backwards compatibility)
+ info_dict.update(formats_to_download[-1])
+ return info_dict
+
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
+ def __forced_printings(self, info_dict, filename, incomplete):
+ def print_mandatory(field):
+ if (self.params.get('force%s' % field, False)
+ and (not incomplete or info_dict.get(field) is not None)):
+ self.to_stdout(info_dict[field])
+
+ def print_optional(field):
+ if (self.params.get('force%s' % field, False)
+ and info_dict.get(field) is not None):
+ self.to_stdout(info_dict[field])
+
+ print_mandatory('title')
+ print_mandatory('id')
+ if self.params.get('forceurl', False) and not incomplete:
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_optional('thumbnail')
+ print_optional('description')
+ if self.params.get('forcefilename', False) and filename is not None:
+ self.to_stdout(filename)
+ if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_dict['duration']))
+ print_mandatory('format')
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(info_dict))
+
+ def process_info(self, info_dict):
+ """Process a single resolved IE result."""
+
+ assert info_dict.get('_type', 'video') == 'video'
+
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None:
+ if self._num_downloads >= int(max_downloads):
+ raise MaxDownloadsReached()
+
+ # TODO: backward compatibility, to be removed
+ info_dict['fulltitle'] = info_dict['title']
+
+ if 'format' not in info_dict:
+ info_dict['format'] = info_dict['ext']
+
+ reason = self._match_entry(info_dict, incomplete=False)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ return
+
+ self._num_downloads += 1
+
+ info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+
+ # Forced printings
+ self.__forced_printings(info_dict, filename, incomplete=False)
+
+ # Do nothing else if in simulate mode
+ if self.params.get('simulate', False):
+ return
+
+ if filename is None:
+ return
+
+ def ensure_dir_exists(path):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ if isinstance(err, OSError) and err.errno == errno.EEXIST:
+ return True
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+ if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ return
+
+ if self.params.get('writedescription', False):
+ descfn = replace_extension(filename, 'description', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Video description is already present')
+ elif info_dict.get('description') is None:
+ self.report_warning('There\'s no description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing video description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(info_dict['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write description file ' + descfn)
+ return
+
+ if self.params.get('writeannotations', False):
+ annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+ self.to_screen('[info] Video annotations are already present')
+ elif not info_dict.get('annotations'):
+ self.report_warning('There are no annotations to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing video annotations to: ' + annofn)
+ with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ annofile.write(info_dict['annotations'])
+ except (KeyError, TypeError):
+ self.report_warning('There are no annotations to write.')
+ except (OSError, IOError):
+ self.report_error('Cannot write annotations file: ' + annofn)
+ return
+
+ subtitles_are_requested = any([self.params.get('writesubtitles', False),
+ self.params.get('writeautomaticsub')])
+
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
+ # subtitles download errors are already managed as troubles in relevant IE
+ # that way it will silently go on when used with unsupporting IE
+ subtitles = info_dict['requested_subtitles']
+ ie = self.get_info_extractor(info_dict['extractor_key'])
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ else:
+ self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ except (OSError, IOError):
+ self.report_error('Cannot write subtitles file ' + sub_filename)
+ return
+ else:
+ try:
+ sub_data = ie._request_webpage(
+ sub_info['url'], info_dict['id'], note=False).read()
+ with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ subfile.write(sub_data)
+ except (ExtractorError, IOError, OSError, ValueError) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, error_to_compat_str(err)))
+ continue
+
+ if self.params.get('writeinfojson', False):
+ infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Video description metadata is already present')
+ else:
+ self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
+ try:
+ write_json_file(self.filter_requested_info(info_dict), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write metadata to JSON file ' + infofn)
+ return
+
+ self._write_thumbnails(info_dict, filename)
+
+ if not self.params.get('skip_download', False):
+ try:
+ def dl(name, info):
+ fd = get_suitable_downloader(info, self.params)(self, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ if self.params.get('verbose'):
+ self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
+ return fd.download(name, info)
+
+ if info_dict.get('requested_formats') is not None:
+ downloaded = []
+ success = True
+ merger = FFmpegMergerPP(self)
+ if not merger.available:
+ postprocessors = []
+ self.report_warning('You have requested multiple '
+ 'formats but ffmpeg or avconv are not installed.'
+ ' The formats won\'t be merged.')
+ else:
+ postprocessors = [merger]
+
+ def compatible_formats(formats):
+ video, audio = formats
+ # Check extension
+ video_ext, audio_ext = video.get('ext'), audio.get('ext')
+ if video_ext and audio_ext:
+ COMPATIBLE_EXTS = (
+ ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
+ ('webm')
+ )
+ for exts in COMPATIBLE_EXTS:
+ if video_ext in exts and audio_ext in exts:
+ return True
+ # TODO: Check acodec/vcodec
+ return False
+
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == info_dict['ext']
+ else filename)
+ requested_formats = info_dict['requested_formats']
+ if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
+ # Ensure filename always has a correct extension for successful merge
+ filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ if os.path.exists(encodeFilename(filename)):
+ self.to_screen(
+ '[download] %s has already been downloaded and '
+ 'merged' % filename)
+ else:
+ for f in requested_formats:
+ new_info = dict(info_dict)
+ new_info.update(f)
+ fname = prepend_extension(
+ self.prepare_filename(new_info),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not ensure_dir_exists(fname):
+ return
+ downloaded.append(fname)
+ partial_success = dl(fname, new_info)
+ success = success and partial_success
+ info_dict['__postprocessors'] = postprocessors
+ info_dict['__files_to_merge'] = downloaded
+ else:
+ # Just a single file
+ success = dl(filename, info_dict)
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_error('unable to download video data: %s' % error_to_compat_str(err))
+ return
+ except (OSError, IOError) as err:
+ raise UnavailableVideoError(err)
+ except (ContentTooShortError, ) as err:
+ self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+ return
+
+ if success and filename != '-':
+ # Fixup content
+ fixup_policy = self.params.get('fixup')
+ if fixup_policy is None:
+ fixup_policy = 'detect_or_warn'
+
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
+ stretched_ratio = info_dict.get('stretched_ratio')
+ if stretched_ratio is not None and stretched_ratio != 1:
+ if fixup_policy == 'warn':
+ self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
+ info_dict['id'], stretched_ratio))
+ elif fixup_policy == 'detect_or_warn':
+ stretched_pp = FFmpegFixupStretchedPP(self)
+ if stretched_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(stretched_pp)
+ else:
+ self.report_warning(
+ '%s: Non-uniform pixel ratio (%s). %s'
+ % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('requested_formats') is None
+ and info_dict.get('container') == 'm4a_dash'):
+ if fixup_policy == 'warn':
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container.'
+ % info_dict['id'])
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM4aPP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('protocol') == 'm3u8_native'
+ or info_dict.get('protocol') == 'm3u8'
+ and self.params.get('hls_prefer_native')):
+ if fixup_policy == 'warn':
+ self.report_warning('%s: malformed AAC bitstream detected.' % (
+ info_dict['id']))
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM3u8PP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: malformed AAC bitstream detected. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ try:
+ self.post_process(filename, info_dict)
+ except (PostProcessingError) as err:
+ self.report_error('postprocessing: %s' % str(err))
+ return
+ self.record_download_archive(info_dict)
+
+ def download(self, url_list):
+ """Download a given list of URLs."""
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ if (len(url_list) > 1
+ and outtmpl != '-'
+ and '%' not in outtmpl
+ and self.params.get('max_downloads') != 1):
+ raise SameFileError(outtmpl)
+
+ for url in url_list:
+ try:
+ # It also downloads the videos
+ res = self.extract_info(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
+ except UnavailableVideoError:
+ self.report_error('unable to download video')
+ except MaxDownloadsReached:
+ self.to_screen('[info] Maximum number of downloaded files reached.')
+ raise
+ else:
+ if self.params.get('dump_single_json', False):
+ self.to_stdout(json.dumps(res))
+
+ return self._download_retcode
+
+ def download_with_info_file(self, info_filename):
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = self.filter_requested_info(json.loads('\n'.join(f)))
+ try:
+ self.process_ie_result(info, download=True)
+ except DownloadError:
+ webpage_url = info.get('webpage_url')
+ if webpage_url is not None:
+ self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
+ return self.download([webpage_url])
+ else:
+ raise
+ return self._download_retcode
+
+ @staticmethod
+ def filter_requested_info(info_dict):
+ return dict(
+ (k, v) for k, v in info_dict.items()
+ if k not in ['requested_formats', 'requested_subtitles'])
+
+ def post_process(self, filename, ie_info):
+ """Run all the postprocessors on the given file."""
+ info = dict(ie_info)
+ info['filepath'] = filename
+ pps_chain = []
+ if ie_info.get('__postprocessors') is not None:
+ pps_chain.extend(ie_info['__postprocessors'])
+ pps_chain.extend(self._pps)
+ for pp in pps_chain:
+ files_to_delete = []
+ try:
+ files_to_delete, info = pp.run(info)
+ except PostProcessingError as e:
+ self.report_error(e.msg)
+ if files_to_delete and not self.params.get('keepvideo', False):
+ for old_filename in files_to_delete:
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+
+ def _make_archive_id(self, info_dict):
+ video_id = info_dict.get('id')
+ if not video_id:
+ return
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ url = str_or_none(info_dict.get('url'))
+ if not url:
+ return
+ # Try to find matching extractor for the URL and take its ie_key
+ for ie in self._ies:
+ if ie.suitable(url):
+ extractor = ie.ie_key()
+ break
+ else:
+ return
+ return extractor.lower() + ' ' + video_id
+
+ def in_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return False
+
+ vid_id = self._make_archive_id(info_dict)
+ if not vid_id:
+ return False # Incomplete video information
+
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ if line.strip() == vid_id:
+ return True
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+
+ def record_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return
+ vid_id = self._make_archive_id(info_dict)
+ assert vid_id
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + '\n')
+
+ @staticmethod
+ def format_resolution(format, default='unknown'):
+ if format.get('vcodec') == 'none':
+ return 'audio only'
+ if format.get('resolution') is not None:
+ return format['resolution']
+ if format.get('height') is not None:
+ if format.get('width') is not None:
+ res = '%sx%s' % (format['width'], format['height'])
+ else:
+ res = '%sp' % format['height']
+ elif format.get('width') is not None:
+ res = '%dx?' % format['width']
+ else:
+ res = default
+ return res
+
+ def _format_note(self, fdict):
+ res = ''
+ if fdict.get('ext') in ['f4f', 'f4m']:
+ res += '(unsupported) '
+ if fdict.get('language'):
+ if res:
+ res += ' '
+ res += '[%s] ' % fdict['language']
+ if fdict.get('format_note') is not None:
+ res += fdict['format_note'] + ' '
+ if fdict.get('tbr') is not None:
+ res += '%4dk ' % fdict['tbr']
+ if fdict.get('container') is not None:
+ if res:
+ res += ', '
+ res += '%s container' % fdict['container']
+ if (fdict.get('vcodec') is not None
+ and fdict.get('vcodec') != 'none'):
+ if res:
+ res += ', '
+ res += fdict['vcodec']
+ if fdict.get('vbr') is not None:
+ res += '@'
+ elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+ res += 'video@'
+ if fdict.get('vbr') is not None:
+ res += '%4dk' % fdict['vbr']
+ if fdict.get('fps') is not None:
+ if res:
+ res += ', '
+ res += '%sfps' % fdict['fps']
+ if fdict.get('acodec') is not None:
+ if res:
+ res += ', '
+ if fdict['acodec'] == 'none':
+ res += 'video only'
+ else:
+ res += '%-5s' % fdict['acodec']
+ elif fdict.get('abr') is not None:
+ if res:
+ res += ', '
+ res += 'audio'
+ if fdict.get('abr') is not None:
+ res += '@%3dk' % fdict['abr']
+ if fdict.get('asr') is not None:
+ res += ' (%5dHz)' % fdict['asr']
+ if fdict.get('filesize') is not None:
+ if res:
+ res += ', '
+ res += format_bytes(fdict['filesize'])
+ elif fdict.get('filesize_approx') is not None:
+ if res:
+ res += ', '
+ res += '~' + format_bytes(fdict['filesize_approx'])
+ return res
+
+ def list_formats(self, info_dict):
+ formats = info_dict.get('formats', [info_dict])
+ table = [
+ [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ if len(formats) > 1:
+ table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
+
+ header_line = ['format code', 'extension', 'resolution', 'note']
+ self.to_screen(
+ '[info] Available formats for %s:\n%s' %
+ (info_dict['id'], render_table(header_line, table)))
+
+ def list_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if not thumbnails:
+ self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
+ return
+
+ self.to_screen(
+ '[info] Thumbnails for %s:' % info_dict['id'])
+ self.to_screen(render_table(
+ ['ID', 'width', 'height', 'URL'],
+ [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
+
+ def urlopen(self, req):
+ """ Start an HTTP download """
+ if isinstance(req, compat_basestring):
+ req = sanitized_Request(req)
+ return self._opener.open(req, timeout=self._socket_timeout)
+
+ def print_debug_header(self):
+ if not self.params.get('verbose'):
+ return
+
+ if type('') is not compat_str:
+ # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
+ self.report_warning(
+ 'Your Python is broken! Update to a newer and supported version')
+
+ stdout_encoding = getattr(
+ sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
+ encoding_str = (
+ '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+ locale.getpreferredencoding(),
+ sys.getfilesystemencoding(),
+ stdout_encoding,
+ self.get_encoding()))
+ write_string(encoding_str, encoding=None)
+
+ self._write_string('[debug] hypervideo version ' + __version__ + '\n')
+ if _LAZY_LOADER:
+ self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ try:
+ sp = subprocess.Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ self._write_string('[debug] Git HEAD: ' + out + '\n')
+ except Exception:
+ try:
+ sys.exc_clear()
+ except Exception:
+ pass
+
+ def python_implementation():
+ impl_name = platform.python_implementation()
+ if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ return impl_name
+
+ self._write_string('[debug] Python version %s (%s) - %s\n' % (
+ platform.python_version(), python_implementation(),
+ platform_name()))
+
+ exe_versions = FFmpegPostProcessor.get_versions(self)
+ exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
+ exe_str = ', '.join(
+ '%s %s' % (exe, v)
+ for exe, v in sorted(exe_versions.items())
+ if v
+ )
+ if not exe_str:
+ exe_str = 'none'
+ self._write_string('[debug] exe versions: %s\n' % exe_str)
+
+ proxy_map = {}
+ for handler in self._opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+
+ if self.params.get('call_home', False):
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+ self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ latest_version = self.urlopen(
+ 'https://yt-dl.org/latest/version').read().decode('utf-8')
+ if version_tuple(latest_version) > version_tuple(__version__):
+ self.report_warning(
+ 'You are using an outdated version (newest version: %s)! '
+ 'See https://yt-dl.org/update if you need help updating.' %
+ latest_version)
+
+ def _setup_opener(self):
+ timeout_val = self.params.get('socket_timeout')
+ self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
+
+ opts_cookiefile = self.params.get('cookiefile')
+ opts_proxy = self.params.get('proxy')
+
+ if opts_cookiefile is None:
+ self.cookiejar = compat_cookiejar.CookieJar()
+ else:
+ opts_cookiefile = expand_path(opts_cookiefile)
+ self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
+ if os.access(opts_cookiefile, os.R_OK):
+ self.cookiejar.load(ignore_discard=True, ignore_expires=True)
+
+ cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
+ if opts_proxy is not None:
+ if opts_proxy == '':
+ proxies = {}
+ else:
+ proxies = {'http': opts_proxy, 'https': opts_proxy}
+ else:
+ proxies = compat_urllib_request.getproxies()
+ # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+ proxy_handler = PerRequestProxyHandler(proxies)
+
+ debuglevel = 1 if self.params.get('debug_printtraffic') else 0
+ https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+ ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+ redirect_handler = YoutubeDLRedirectHandler()
+ data_handler = compat_urllib_request_DataHandler()
+
+ # When passing our own FileHandler instance, build_opener won't add the
+ # default FileHandler and allows us to disable the file protocol, which
+ # can be used for malicious purposes (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8227)
+ file_handler = compat_urllib_request.FileHandler()
+
+ def file_open(*args, **kwargs):
+ raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in hypervideo for security reasons')
+ file_handler.file_open = file_open
+
+ opener = compat_urllib_request.build_opener(
+ proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
+
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ self._opener = opener
+
+ def encode(self, s):
+ if isinstance(s, bytes):
+ return s # Already encoded
+
+ try:
+ return s.encode(self.get_encoding())
+ except UnicodeEncodeError as err:
+ err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+ raise
+
+ def get_encoding(self):
+ encoding = self.params.get('encoding')
+ if encoding is None:
+ encoding = preferredencoding()
+ return encoding
+
+ def _write_thumbnails(self, info_dict, filename):
+ if self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails:
+ thumbnails = [thumbnails[-1]]
+ elif self.params.get('write_all_thumbnails', False):
+ thumbnails = info_dict.get('thumbnails')
+ else:
+ return
+
+ if not thumbnails:
+ # No thumbnails present, so return immediately
+ return
+
+ for t in thumbnails:
+ thumb_ext = determine_ext(t['url'], 'jpg')
+ suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+ t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
+
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ self.to_screen('[%s] %s: Thumbnail %sis already present' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ else:
+ self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ uf = self.urlopen(t['url'])
+ with open(encodeFilename(thumb_filename), 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download thumbnail "%s": %s' %
+ (t['url'], error_to_compat_str(err)))
diff --git a/hypervideo_dl/__init__.py b/hypervideo_dl/__init__.py
new file mode 100644
index 0000000..70c53fc
--- /dev/null
+++ b/hypervideo_dl/__init__.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+__license__ = 'CC0-1.0'
+
+import codecs
+import io
+import os
+import random
+import sys
+
+
+from .options import (
+ parseOpts,
+)
+from .compat import (
+ compat_getpass,
+ compat_shlex_split,
+ workaround_optparse_bug9161,
+)
+from .utils import (
+ DateRange,
+ decodeOption,
+ DEFAULT_OUTTMPL,
+ DownloadError,
+ expand_path,
+ match_filter_func,
+ MaxDownloadsReached,
+ preferredencoding,
+ read_batch_urls,
+ SameFileError,
+ setproctitle,
+ std_headers,
+ write_string,
+ render_table,
+)
+from .downloader import (
+ FileDownloader,
+)
+from .extractor import gen_extractors, list_extractors
+from .extractor.adobepass import MSO_INFO
+from .YoutubeDL import YoutubeDL
+
+
+def _real_main(argv=None):
+ # Compatibility fixes for Windows
+ if sys.platform == 'win32':
+ # https://github.com/ytdl-org/youtube-dl/issues/820
+ codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+
+ workaround_optparse_bug9161()
+
+ setproctitle('hypervideo')
+
+ parser, opts, args = parseOpts(argv)
+
+ # Set user agent
+ if opts.user_agent is not None:
+ std_headers['User-Agent'] = opts.user_agent
+
+ # Set referer
+ if opts.referer is not None:
+ std_headers['Referer'] = opts.referer
+
+ # Custom HTTP headers
+ if opts.headers is not None:
+ for h in opts.headers:
+ if ':' not in h:
+ parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
+ key, value = h.split(':', 1)
+ if opts.verbose:
+ write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
+ std_headers[key] = value
+
+ # Dump user agent
+ if opts.dump_user_agent:
+ write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
+ sys.exit(0)
+
+ # Batch file verification
+ batch_urls = []
+ if opts.batchfile is not None:
+ try:
+ if opts.batchfile == '-':
+ batchfd = sys.stdin
+ else:
+ batchfd = io.open(
+ expand_path(opts.batchfile),
+ 'r', encoding='utf-8', errors='ignore')
+ batch_urls = read_batch_urls(batchfd)
+ if opts.verbose:
+ write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
+ except IOError:
+ sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
+ all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
+ _enc = preferredencoding()
+ all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
+
+ if opts.list_extractors:
+ for ie in list_extractors(opts.age_limit):
+ write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
+ matchedUrls = [url for url in all_urls if ie.suitable(url)]
+ for mu in matchedUrls:
+ write_string(' ' + mu + '\n', out=sys.stdout)
+ sys.exit(0)
+ if opts.list_extractor_descriptions:
+ for ie in list_extractors(opts.age_limit):
+ if not ie._WORKING:
+ continue
+ desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+ if desc is False:
+ continue
+ if hasattr(ie, 'SEARCH_KEY'):
+ _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
+ _COUNTS = ('', '5', '10', 'all')
+ desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
+ write_string(desc + '\n', out=sys.stdout)
+ sys.exit(0)
+ if opts.ap_list_mso:
+ table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
+ write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
+ sys.exit(0)
+
+ # Conflicting, missing and erroneous options
+ if opts.usenetrc and (opts.username is not None or opts.password is not None):
+ parser.error('using .netrc conflicts with giving username/password')
+ if opts.password is not None and opts.username is None:
+ parser.error('account username missing\n')
+ if opts.ap_password is not None and opts.ap_username is None:
+ parser.error('TV Provider account username missing\n')
+ if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
+ parser.error('using output template conflicts with using title, video ID or auto number')
+ if opts.autonumber_size is not None:
+ if opts.autonumber_size <= 0:
+ parser.error('auto number size must be positive')
+ if opts.autonumber_start is not None:
+ if opts.autonumber_start < 0:
+ parser.error('auto number start must be positive or 0')
+ if opts.usetitle and opts.useid:
+ parser.error('using title conflicts with using video ID')
+ if opts.username is not None and opts.password is None:
+ opts.password = compat_getpass('Type account password and press [Return]: ')
+ if opts.ap_username is not None and opts.ap_password is None:
+ opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
+ if opts.ratelimit is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
+ if numeric_limit is None:
+ parser.error('invalid rate limit specified')
+ opts.ratelimit = numeric_limit
+ if opts.min_filesize is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
+ if numeric_limit is None:
+ parser.error('invalid min_filesize specified')
+ opts.min_filesize = numeric_limit
+ if opts.max_filesize is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
+ if numeric_limit is None:
+ parser.error('invalid max_filesize specified')
+ opts.max_filesize = numeric_limit
+ if opts.sleep_interval is not None:
+ if opts.sleep_interval < 0:
+ parser.error('sleep interval must be positive or 0')
+ if opts.max_sleep_interval is not None:
+ if opts.max_sleep_interval < 0:
+ parser.error('max sleep interval must be positive or 0')
+ if opts.sleep_interval is None:
+ parser.error('min sleep interval must be specified, use --min-sleep-interval')
+ if opts.max_sleep_interval < opts.sleep_interval:
+ parser.error('max sleep interval must be greater than or equal to min sleep interval')
+ else:
+ opts.max_sleep_interval = opts.sleep_interval
+ if opts.ap_mso and opts.ap_mso not in MSO_INFO:
+ parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+
+ def parse_retries(retries):
+ if retries in ('inf', 'infinite'):
+ parsed_retries = float('inf')
+ else:
+ try:
+ parsed_retries = int(retries)
+ except (TypeError, ValueError):
+ parser.error('invalid retry count specified')
+ return parsed_retries
+ if opts.retries is not None:
+ opts.retries = parse_retries(opts.retries)
+ if opts.fragment_retries is not None:
+ opts.fragment_retries = parse_retries(opts.fragment_retries)
+ if opts.buffersize is not None:
+ numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
+ if numeric_buffersize is None:
+ parser.error('invalid buffer size specified')
+ opts.buffersize = numeric_buffersize
+ if opts.http_chunk_size is not None:
+ numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
+ if not numeric_chunksize:
+ parser.error('invalid http chunk size specified')
+ opts.http_chunk_size = numeric_chunksize
+ if opts.playliststart <= 0:
+ raise ValueError('Playlist start must be positive')
+ if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
+ raise ValueError('Playlist end must be greater than playlist start')
+ if opts.extractaudio:
+ if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
+ parser.error('invalid audio format specified')
+ if opts.audioquality:
+ opts.audioquality = opts.audioquality.strip('k').strip('K')
+ if not opts.audioquality.isdigit():
+ parser.error('invalid audio quality specified')
+ if opts.recodevideo is not None:
+ if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
+ parser.error('invalid video recode format specified')
+ if opts.convertsubtitles is not None:
+ if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
+ parser.error('invalid subtitle format specified')
+
+ if opts.date is not None:
+ date = DateRange.day(opts.date)
+ else:
+ date = DateRange(opts.dateafter, opts.datebefore)
+
+ # Do not download videos when there are audio-only formats
+ if opts.extractaudio and not opts.keepvideo and opts.format is None:
+ opts.format = 'bestaudio/best'
+
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ # this was the old behaviour if only --all-sub was given.
+ if opts.allsubtitles and not opts.writeautomaticsub:
+ opts.writesubtitles = True
+
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
+ or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
+ or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
+ or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
+ or (opts.useid and '%(id)s.%(ext)s')
+ or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
+ or DEFAULT_OUTTMPL)
+ if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
+ parser.error('Cannot download a video and extract audio into the same'
+ ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
+ ' template'.format(outtmpl))
+
+ any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ any_printing = opts.print_json
+ download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+
+ # PostProcessors
+ postprocessors = []
+ if opts.metafromtitle:
+ postprocessors.append({
+ 'key': 'MetadataFromTitle',
+ 'titleformat': opts.metafromtitle
+ })
+ if opts.extractaudio:
+ postprocessors.append({
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': opts.audioformat,
+ 'preferredquality': opts.audioquality,
+ 'nopostoverwrites': opts.nopostoverwrites,
+ })
+ if opts.recodevideo:
+ postprocessors.append({
+ 'key': 'FFmpegVideoConvertor',
+ 'preferedformat': opts.recodevideo,
+ })
+ # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+ # FFmpegExtractAudioPP as containers before conversion may not support
+ # metadata (3gp, webm, etc.)
+ # And this post-processor should be placed before other metadata
+ # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
+ # extra metadata. By default ffmpeg preserves metadata applicable for both
+ # source and target containers. From this point the container won't change,
+ # so metadata can be added here.
+ if opts.addmetadata:
+ postprocessors.append({'key': 'FFmpegMetadata'})
+ if opts.convertsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegSubtitlesConvertor',
+ 'format': opts.convertsubtitles,
+ })
+ if opts.embedsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegEmbedSubtitle',
+ })
+ if opts.embedthumbnail:
+ already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
+ postprocessors.append({
+ 'key': 'EmbedThumbnail',
+ 'already_have_thumbnail': already_have_thumbnail
+ })
+ if not already_have_thumbnail:
+ opts.writethumbnail = True
+ # XAttrMetadataPP should be run after post-processors that may change file
+ # contents
+ if opts.xattrs:
+ postprocessors.append({'key': 'XAttrMetadata'})
+ # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
+ # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
+ if opts.exec_cmd:
+ postprocessors.append({
+ 'key': 'ExecAfterDownload',
+ 'exec_cmd': opts.exec_cmd,
+ })
+ external_downloader_args = None
+ if opts.external_downloader_args:
+ external_downloader_args = compat_shlex_split(opts.external_downloader_args)
+ postprocessor_args = None
+ if opts.postprocessor_args:
+ postprocessor_args = compat_shlex_split(opts.postprocessor_args)
+ match_filter = (
+ None if opts.match_filter is None
+ else match_filter_func(opts.match_filter))
+
+ ydl_opts = {
+ 'usenetrc': opts.usenetrc,
+ 'username': opts.username,
+ 'password': opts.password,
+ 'twofactor': opts.twofactor,
+ 'videopassword': opts.videopassword,
+ 'ap_mso': opts.ap_mso,
+ 'ap_username': opts.ap_username,
+ 'ap_password': opts.ap_password,
+ 'quiet': (opts.quiet or any_getting or any_printing),
+ 'no_warnings': opts.no_warnings,
+ 'forceurl': opts.geturl,
+ 'forcetitle': opts.gettitle,
+ 'forceid': opts.getid,
+ 'forcethumbnail': opts.getthumbnail,
+ 'forcedescription': opts.getdescription,
+ 'forceduration': opts.getduration,
+ 'forcefilename': opts.getfilename,
+ 'forceformat': opts.getformat,
+ 'forcejson': opts.dumpjson or opts.print_json,
+ 'dump_single_json': opts.dump_single_json,
+ 'simulate': opts.simulate or any_getting,
+ 'skip_download': opts.skip_download,
+ 'format': opts.format,
+ 'listformats': opts.listformats,
+ 'outtmpl': outtmpl,
+ 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
+ 'autonumber_size': opts.autonumber_size,
+ 'autonumber_start': opts.autonumber_start,
+ 'restrictfilenames': opts.restrictfilenames,
+ 'ignoreerrors': opts.ignoreerrors,
+ 'force_generic_extractor': opts.force_generic_extractor,
+ 'ratelimit': opts.ratelimit,
+ 'nooverwrites': opts.nooverwrites,
+ 'retries': opts.retries,
+ 'fragment_retries': opts.fragment_retries,
+ 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
+ 'keep_fragments': opts.keep_fragments,
+ 'buffersize': opts.buffersize,
+ 'noresizebuffer': opts.noresizebuffer,
+ 'http_chunk_size': opts.http_chunk_size,
+ 'continuedl': opts.continue_dl,
+ 'noprogress': opts.noprogress,
+ 'progress_with_newline': opts.progress_with_newline,
+ 'playliststart': opts.playliststart,
+ 'playlistend': opts.playlistend,
+ 'playlistreverse': opts.playlist_reverse,
+ 'playlistrandom': opts.playlist_random,
+ 'noplaylist': opts.noplaylist,
+ 'logtostderr': opts.outtmpl == '-',
+ 'consoletitle': opts.consoletitle,
+ 'nopart': opts.nopart,
+ 'updatetime': opts.updatetime,
+ 'writedescription': opts.writedescription,
+ 'writeannotations': opts.writeannotations,
+ 'writeinfojson': opts.writeinfojson,
+ 'writethumbnail': opts.writethumbnail,
+ 'write_all_thumbnails': opts.write_all_thumbnails,
+ 'writesubtitles': opts.writesubtitles,
+ 'writeautomaticsub': opts.writeautomaticsub,
+ 'allsubtitles': opts.allsubtitles,
+ 'listsubtitles': opts.listsubtitles,
+ 'subtitlesformat': opts.subtitlesformat,
+ 'subtitleslangs': opts.subtitleslangs,
+ 'matchtitle': decodeOption(opts.matchtitle),
+ 'rejecttitle': decodeOption(opts.rejecttitle),
+ 'max_downloads': opts.max_downloads,
+ 'prefer_free_formats': opts.prefer_free_formats,
+ 'verbose': opts.verbose,
+ 'dump_intermediate_pages': opts.dump_intermediate_pages,
+ 'write_pages': opts.write_pages,
+ 'test': opts.test,
+ 'keepvideo': opts.keepvideo,
+ 'min_filesize': opts.min_filesize,
+ 'max_filesize': opts.max_filesize,
+ 'min_views': opts.min_views,
+ 'max_views': opts.max_views,
+ 'daterange': date,
+ 'cachedir': opts.cachedir,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
+ 'download_archive': download_archive_fn,
+ 'cookiefile': opts.cookiefile,
+ 'nocheckcertificate': opts.no_check_certificate,
+ 'prefer_insecure': opts.prefer_insecure,
+ 'proxy': opts.proxy,
+ 'socket_timeout': opts.socket_timeout,
+ 'bidi_workaround': opts.bidi_workaround,
+ 'debug_printtraffic': opts.debug_printtraffic,
+ 'prefer_ffmpeg': opts.prefer_ffmpeg,
+ 'include_ads': opts.include_ads,
+ 'default_search': opts.default_search,
+ 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+ 'encoding': opts.encoding,
+ 'extract_flat': opts.extract_flat,
+ 'mark_watched': opts.mark_watched,
+ 'merge_output_format': opts.merge_output_format,
+ 'postprocessors': postprocessors,
+ 'fixup': opts.fixup,
+ 'source_address': opts.source_address,
+ 'call_home': opts.call_home,
+ 'sleep_interval': opts.sleep_interval,
+ 'max_sleep_interval': opts.max_sleep_interval,
+ 'external_downloader': opts.external_downloader,
+ 'list_thumbnails': opts.list_thumbnails,
+ 'playlist_items': opts.playlist_items,
+ 'xattr_set_filesize': opts.xattr_set_filesize,
+ 'match_filter': match_filter,
+ 'no_color': opts.no_color,
+ 'ffmpeg_location': opts.ffmpeg_location,
+ 'hls_prefer_native': opts.hls_prefer_native,
+ 'hls_use_mpegts': opts.hls_use_mpegts,
+ 'external_downloader_args': external_downloader_args,
+ 'postprocessor_args': postprocessor_args,
+ 'cn_verification_proxy': opts.cn_verification_proxy,
+ 'geo_verification_proxy': opts.geo_verification_proxy,
+ 'config_location': opts.config_location,
+ 'geo_bypass': opts.geo_bypass,
+ 'geo_bypass_country': opts.geo_bypass_country,
+ 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
+ # just for deprecation check
+ 'autonumber': opts.autonumber if opts.autonumber is True else None,
+ 'usetitle': opts.usetitle if opts.usetitle is True else None,
+ }
+
+ with YoutubeDL(ydl_opts) as ydl:
+
+ # Remove cache dir
+ if opts.rm_cachedir:
+ ydl.cache.remove()
+
+ # Maybe do nothing
+ if (len(all_urls) < 1) and (opts.load_info_filename is None):
+
+ ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+ parser.error(
+ 'You must provide at least one URL.\n'
+ 'Type hypervideo --help to see a list of all options.')
+
+ try:
+ if opts.load_info_filename is not None:
+ retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
+ else:
+ retcode = ydl.download(all_urls)
+ except MaxDownloadsReached:
+ ydl.to_screen('--max-download limit reached, aborting.')
+ retcode = 101
+
+ sys.exit(retcode)
+
+
+def main(argv=None):
+ try:
+ _real_main(argv)
+ except DownloadError:
+ sys.exit(1)
+ except SameFileError:
+ sys.exit('ERROR: fixed output name but more than one file to download')
+ except KeyboardInterrupt:
+ sys.exit('\nERROR: Interrupted by user')
+
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/hypervideo_dl/__main__.py b/hypervideo_dl/__main__.py
new file mode 100755
index 0000000..e3b35e2
--- /dev/null
+++ b/hypervideo_dl/__main__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Execute with
+# $ python hypervideo_dl/__main__.py (2.6+)
+# $ python -m hypervideo_dl (2.7+)
+
+import sys
+
+if __package__ is None and not hasattr(sys, 'frozen'):
+ # direct call of __main__.py
+ import os.path
+ path = os.path.realpath(os.path.abspath(__file__))
+ sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
+
+import hypervideo_dl
+
+if __name__ == '__main__':
+ hypervideo_dl.main()
diff --git a/hypervideo_dl/aes.py b/hypervideo_dl/aes.py
new file mode 100644
index 0000000..461bb6d
--- /dev/null
+++ b/hypervideo_dl/aes.py
@@ -0,0 +1,361 @@
+from __future__ import unicode_literals
+
+from math import ceil
+
+from .compat import compat_b64decode
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+
+def aes_ctr_decrypt(data, key, counter):
+ """
+ Decrypt with aes in counter mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
+ returns the next counter block
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data = []
+ for i in range(block_count):
+ counter_block = counter.next_value()
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+ decrypted_data += xor(block, cipher_counter_block)
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+
+def aes_cbc_decrypt(data, key, iv):
+ """
+ Decrypt with aes in CBC mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ decrypted_block = aes_decrypt(block, expanded_key)
+ decrypted_data += xor(decrypted_block, previous_cipher_block)
+ previous_cipher_block = block
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+
+def aes_cbc_encrypt(data, key, iv):
+ """
+ Encrypt with aes in CBC mode. Using PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ remaining_length = BLOCK_SIZE_BYTES - len(block)
+ block += [remaining_length] * remaining_length
+ mixed_block = xor(block, previous_cipher_block)
+
+ encrypted_block = aes_encrypt(mixed_block, expanded_key)
+ encrypted_data += encrypted_block
+
+ previous_cipher_block = encrypted_block
+
+ return encrypted_data
+
+
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+
+def aes_encrypt(data, expanded_key):
+ """
+ Encrypt one block with aes
+
+ @param {int[]} data 16-Byte state
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte cipher
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+ for i in range(1, rounds + 1):
+ data = sub_bytes(data)
+ data = shift_rows(data)
+ if i != rounds:
+ data = mix_columns(data)
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt(data, expanded_key):
+ """
+ Decrypt one block with aes
+
+ @param {int[]} data 16-Byte cipher
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte state
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ for i in range(rounds, 0, -1):
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+ if i != rounds:
+ data = mix_columns_inv(data)
+ data = shift_rows_inv(data)
+ data = sub_bytes_inv(data)
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt_text(data, password, key_size_bytes):
+ """
+ Decrypt text
+ - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+ - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+ with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+ - Mode of operation is 'counter'
+
+ @param {str} data Base64 encoded string
+ @param {str,unicode} password Password (will be encoded with utf-8)
+ @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+ @returns {str} Decrypted data
+ """
+ NONCE_LENGTH_BYTES = 8
+
+ data = bytes_to_intlist(compat_b64decode(data))
+ password = bytes_to_intlist(password.encode('utf-8'))
+
+ key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
+ key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+
+ nonce = data[:NONCE_LENGTH_BYTES]
+ cipher = data[NONCE_LENGTH_BYTES:]
+
+ class Counter(object):
+ __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+
+ def next_value(self):
+ temp = self.__value
+ self.__value = inc(self.__value)
+ return temp
+
+ decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+ plaintext = intlist_to_bytes(decrypted_data)
+
+ return plaintext
+
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
+MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
+ (0x1, 0x2, 0x3, 0x1),
+ (0x1, 0x1, 0x2, 0x3),
+ (0x3, 0x1, 0x1, 0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
+ (0x9, 0xE, 0xB, 0xD),
+ (0xD, 0x9, 0xE, 0xB),
+ (0xB, 0xD, 0x9, 0xE))
+RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
+ 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
+ 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
+ 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
+ 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
+ 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
+ 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
+ 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
+ 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
+ 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
+ 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
+ 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
+ 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
+ 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
+ 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
+RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
+
+
+def sub_bytes(data):
+ return [SBOX[x] for x in data]
+
+
+def sub_bytes_inv(data):
+ return [SBOX_INV[x] for x in data]
+
+
+def rotate(data):
+ return data[1:] + [data[0]]
+
+
+def key_schedule_core(data, rcon_iteration):
+ data = rotate(data)
+ data = sub_bytes(data)
+ data[0] = data[0] ^ RCON[rcon_iteration]
+
+ return data
+
+
+def xor(data1, data2):
+ return [x ^ y for x, y in zip(data1, data2)]
+
+
+def rijndael_mul(a, b):
+ if(a == 0 or b == 0):
+ return 0
+ return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
+
+
+def mix_column(data, matrix):
+ data_mixed = []
+ for row in range(4):
+ mixed = 0
+ for column in range(4):
+ # xor is (+) and (-)
+ mixed ^= rijndael_mul(data[column], matrix[row][column])
+ data_mixed.append(mixed)
+ return data_mixed
+
+
+def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
+ data_mixed = []
+ for i in range(4):
+ column = data[i * 4: (i + 1) * 4]
+ data_mixed += mix_column(column, matrix)
+ return data_mixed
+
+
+def mix_columns_inv(data):
+ return mix_columns(data, MIX_COLUMN_MATRIX_INV)
+
+
+def shift_rows(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append(data[((column + row) & 0b11) * 4 + row])
+ return data_shifted
+
+
+def shift_rows_inv(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append(data[((column - row) & 0b11) * 4 + row])
+ return data_shifted
+
+
+def inc(data):
+ data = data[:] # copy
+ for i in range(len(data) - 1, -1, -1):
+ if data[i] == 255:
+ data[i] = 0
+ else:
+ data[i] = data[i] + 1
+ break
+ return data
+
+
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
diff --git a/hypervideo_dl/cache.py b/hypervideo_dl/cache.py
new file mode 100644
index 0000000..81cd297
--- /dev/null
+++ b/hypervideo_dl/cache.py
@@ -0,0 +1,96 @@
+from __future__ import unicode_literals
+
+import errno
+import io
+import json
+import os
+import re
+import shutil
+import traceback
+
+from .compat import compat_getenv
+from .utils import (
+ expand_path,
+ write_json_file,
+)
+
+
+class Cache(object):
+ def __init__(self, ydl):
+ self._ydl = ydl
+
+ def _get_root_dir(self):
+ res = self._ydl.params.get('cachedir')
+ if res is None:
+ cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
+ res = os.path.join(cache_root, 'hypervideo')
+ return expand_path(res)
+
+ def _get_cache_fn(self, section, key, dtype):
+ assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
+ 'invalid section %r' % section
+ assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
+ return os.path.join(
+ self._get_root_dir(), section, '%s.%s' % (key, dtype))
+
+ @property
+ def enabled(self):
+ return self._ydl.params.get('cachedir') is not False
+
+ def store(self, section, key, data, dtype='json'):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return
+
+ fn = self._get_cache_fn(section, key, dtype)
+ try:
+ try:
+ os.makedirs(os.path.dirname(fn))
+ except OSError as ose:
+ if ose.errno != errno.EEXIST:
+ raise
+ write_json_file(data, fn)
+ except Exception:
+ tb = traceback.format_exc()
+ self._ydl.report_warning(
+ 'Writing cache to %r failed: %s' % (fn, tb))
+
+ def load(self, section, key, dtype='json', default=None):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return default
+
+ cache_fn = self._get_cache_fn(section, key, dtype)
+ try:
+ try:
+ with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+ return json.load(cachef)
+ except ValueError:
+ try:
+ file_size = os.path.getsize(cache_fn)
+ except (OSError, IOError) as oe:
+ file_size = str(oe)
+ self._ydl.report_warning(
+ 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
+ except IOError:
+ pass # No cache available
+
+ return default
+
+ def remove(self):
+ if not self.enabled:
+ self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
+ return
+
+ cachedir = self._get_root_dir()
+ if not any((term in cachedir) for term in ('cache', 'tmp')):
+ raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
+
+ self._ydl.to_screen(
+ 'Removing cache dir %s .' % cachedir, skip_eol=True)
+ if os.path.exists(cachedir):
+ self._ydl.to_screen('.', skip_eol=True)
+ shutil.rmtree(cachedir)
+ self._ydl.to_screen('.')
diff --git a/hypervideo_dl/compat.py b/hypervideo_dl/compat.py
new file mode 100644
index 0000000..97ab37a
--- /dev/null
+++ b/hypervideo_dl/compat.py
@@ -0,0 +1,3060 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import collections
+import ctypes
+import email
+import getpass
+import io
+import itertools
+import optparse
+import os
+import platform
+import re
+import shlex
+import shutil
+import socket
+import struct
+import subprocess
+import sys
+import xml.etree.ElementTree
+
+
+try:
+ import urllib.request as compat_urllib_request
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_request
+
+try:
+ import urllib.error as compat_urllib_error
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_error
+
+try:
+ import urllib.parse as compat_urllib_parse
+except ImportError: # Python 2
+ import urllib as compat_urllib_parse
+
+try:
+ from urllib.parse import urlparse as compat_urllib_parse_urlparse
+except ImportError: # Python 2
+ from urlparse import urlparse as compat_urllib_parse_urlparse
+
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
+try:
+ import urllib.response as compat_urllib_response
+except ImportError: # Python 2
+ import urllib as compat_urllib_response
+
+try:
+ import http.cookiejar as compat_cookiejar
+except ImportError: # Python 2
+ import cookielib as compat_cookiejar
+
+if sys.version_info[0] == 2:
+ class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
+ def __init__(self, version, name, value, *args, **kwargs):
+ if isinstance(name, compat_str):
+ name = name.encode()
+ if isinstance(value, compat_str):
+ value = value.encode()
+ compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
+else:
+ compat_cookiejar_Cookie = compat_cookiejar.Cookie
+
+try:
+ import http.cookies as compat_cookies
+except ImportError: # Python 2
+ import Cookie as compat_cookies
+
+if sys.version_info[0] == 2:
+ class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+ def load(self, rawdata):
+ if isinstance(rawdata, compat_str):
+ rawdata = str(rawdata)
+ return super(compat_cookies_SimpleCookie, self).load(rawdata)
+else:
+ compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+
+try:
+ import html.entities as compat_html_entities
+except ImportError: # Python 2
+ import htmlentitydefs as compat_html_entities
+
+try: # Python >= 3.3
+ compat_html_entities_html5 = compat_html_entities.html5
+except AttributeError:
+ # Copied from CPython 3.5.1 html/entities.py
+ compat_html_entities_html5 = {
+ 'Aacute': '\xc1',
+ 'aacute': '\xe1',
+ 'Aacute;': '\xc1',
+ 'aacute;': '\xe1',
+ 'Abreve;': '\u0102',
+ 'abreve;': '\u0103',
+ 'ac;': '\u223e',
+ 'acd;': '\u223f',
+ 'acE;': '\u223e\u0333',
+ 'Acirc': '\xc2',
+ 'acirc': '\xe2',
+ 'Acirc;': '\xc2',
+ 'acirc;': '\xe2',
+ 'acute': '\xb4',
+ 'acute;': '\xb4',
+ 'Acy;': '\u0410',
+ 'acy;': '\u0430',
+ 'AElig': '\xc6',
+ 'aelig': '\xe6',
+ 'AElig;': '\xc6',
+ 'aelig;': '\xe6',
+ 'af;': '\u2061',
+ 'Afr;': '\U0001d504',
+ 'afr;': '\U0001d51e',
+ 'Agrave': '\xc0',
+ 'agrave': '\xe0',
+ 'Agrave;': '\xc0',
+ 'agrave;': '\xe0',
+ 'alefsym;': '\u2135',
+ 'aleph;': '\u2135',
+ 'Alpha;': '\u0391',
+ 'alpha;': '\u03b1',
+ 'Amacr;': '\u0100',
+ 'amacr;': '\u0101',
+ 'amalg;': '\u2a3f',
+ 'AMP': '&',
+ 'amp': '&',
+ 'AMP;': '&',
+ 'amp;': '&',
+ 'And;': '\u2a53',
+ 'and;': '\u2227',
+ 'andand;': '\u2a55',
+ 'andd;': '\u2a5c',
+ 'andslope;': '\u2a58',
+ 'andv;': '\u2a5a',
+ 'ang;': '\u2220',
+ 'ange;': '\u29a4',
+ 'angle;': '\u2220',
+ 'angmsd;': '\u2221',
+ 'angmsdaa;': '\u29a8',
+ 'angmsdab;': '\u29a9',
+ 'angmsdac;': '\u29aa',
+ 'angmsdad;': '\u29ab',
+ 'angmsdae;': '\u29ac',
+ 'angmsdaf;': '\u29ad',
+ 'angmsdag;': '\u29ae',
+ 'angmsdah;': '\u29af',
+ 'angrt;': '\u221f',
+ 'angrtvb;': '\u22be',
+ 'angrtvbd;': '\u299d',
+ 'angsph;': '\u2222',
+ 'angst;': '\xc5',
+ 'angzarr;': '\u237c',
+ 'Aogon;': '\u0104',
+ 'aogon;': '\u0105',
+ 'Aopf;': '\U0001d538',
+ 'aopf;': '\U0001d552',
+ 'ap;': '\u2248',
+ 'apacir;': '\u2a6f',
+ 'apE;': '\u2a70',
+ 'ape;': '\u224a',
+ 'apid;': '\u224b',
+ 'apos;': "'",
+ 'ApplyFunction;': '\u2061',
+ 'approx;': '\u2248',
+ 'approxeq;': '\u224a',
+ 'Aring': '\xc5',
+ 'aring': '\xe5',
+ 'Aring;': '\xc5',
+ 'aring;': '\xe5',
+ 'Ascr;': '\U0001d49c',
+ 'ascr;': '\U0001d4b6',
+ 'Assign;': '\u2254',
+ 'ast;': '*',
+ 'asymp;': '\u2248',
+ 'asympeq;': '\u224d',
+ 'Atilde': '\xc3',
+ 'atilde': '\xe3',
+ 'Atilde;': '\xc3',
+ 'atilde;': '\xe3',
+ 'Auml': '\xc4',
+ 'auml': '\xe4',
+ 'Auml;': '\xc4',
+ 'auml;': '\xe4',
+ 'awconint;': '\u2233',
+ 'awint;': '\u2a11',
+ 'backcong;': '\u224c',
+ 'backepsilon;': '\u03f6',
+ 'backprime;': '\u2035',
+ 'backsim;': '\u223d',
+ 'backsimeq;': '\u22cd',
+ 'Backslash;': '\u2216',
+ 'Barv;': '\u2ae7',
+ 'barvee;': '\u22bd',
+ 'Barwed;': '\u2306',
+ 'barwed;': '\u2305',
+ 'barwedge;': '\u2305',
+ 'bbrk;': '\u23b5',
+ 'bbrktbrk;': '\u23b6',
+ 'bcong;': '\u224c',
+ 'Bcy;': '\u0411',
+ 'bcy;': '\u0431',
+ 'bdquo;': '\u201e',
+ 'becaus;': '\u2235',
+ 'Because;': '\u2235',
+ 'because;': '\u2235',
+ 'bemptyv;': '\u29b0',
+ 'bepsi;': '\u03f6',
+ 'bernou;': '\u212c',
+ 'Bernoullis;': '\u212c',
+ 'Beta;': '\u0392',
+ 'beta;': '\u03b2',
+ 'beth;': '\u2136',
+ 'between;': '\u226c',
+ 'Bfr;': '\U0001d505',
+ 'bfr;': '\U0001d51f',
+ 'bigcap;': '\u22c2',
+ 'bigcirc;': '\u25ef',
+ 'bigcup;': '\u22c3',
+ 'bigodot;': '\u2a00',
+ 'bigoplus;': '\u2a01',
+ 'bigotimes;': '\u2a02',
+ 'bigsqcup;': '\u2a06',
+ 'bigstar;': '\u2605',
+ 'bigtriangledown;': '\u25bd',
+ 'bigtriangleup;': '\u25b3',
+ 'biguplus;': '\u2a04',
+ 'bigvee;': '\u22c1',
+ 'bigwedge;': '\u22c0',
+ 'bkarow;': '\u290d',
+ 'blacklozenge;': '\u29eb',
+ 'blacksquare;': '\u25aa',
+ 'blacktriangle;': '\u25b4',
+ 'blacktriangledown;': '\u25be',
+ 'blacktriangleleft;': '\u25c2',
+ 'blacktriangleright;': '\u25b8',
+ 'blank;': '\u2423',
+ 'blk12;': '\u2592',
+ 'blk14;': '\u2591',
+ 'blk34;': '\u2593',
+ 'block;': '\u2588',
+ 'bne;': '=\u20e5',
+ 'bnequiv;': '\u2261\u20e5',
+ 'bNot;': '\u2aed',
+ 'bnot;': '\u2310',
+ 'Bopf;': '\U0001d539',
+ 'bopf;': '\U0001d553',
+ 'bot;': '\u22a5',
+ 'bottom;': '\u22a5',
+ 'bowtie;': '\u22c8',
+ 'boxbox;': '\u29c9',
+ 'boxDL;': '\u2557',
+ 'boxDl;': '\u2556',
+ 'boxdL;': '\u2555',
+ 'boxdl;': '\u2510',
+ 'boxDR;': '\u2554',
+ 'boxDr;': '\u2553',
+ 'boxdR;': '\u2552',
+ 'boxdr;': '\u250c',
+ 'boxH;': '\u2550',
+ 'boxh;': '\u2500',
+ 'boxHD;': '\u2566',
+ 'boxHd;': '\u2564',
+ 'boxhD;': '\u2565',
+ 'boxhd;': '\u252c',
+ 'boxHU;': '\u2569',
+ 'boxHu;': '\u2567',
+ 'boxhU;': '\u2568',
+ 'boxhu;': '\u2534',
+ 'boxminus;': '\u229f',
+ 'boxplus;': '\u229e',
+ 'boxtimes;': '\u22a0',
+ 'boxUL;': '\u255d',
+ 'boxUl;': '\u255c',
+ 'boxuL;': '\u255b',
+ 'boxul;': '\u2518',
+ 'boxUR;': '\u255a',
+ 'boxUr;': '\u2559',
+ 'boxuR;': '\u2558',
+ 'boxur;': '\u2514',
+ 'boxV;': '\u2551',
+ 'boxv;': '\u2502',
+ 'boxVH;': '\u256c',
+ 'boxVh;': '\u256b',
+ 'boxvH;': '\u256a',
+ 'boxvh;': '\u253c',
+ 'boxVL;': '\u2563',
+ 'boxVl;': '\u2562',
+ 'boxvL;': '\u2561',
+ 'boxvl;': '\u2524',
+ 'boxVR;': '\u2560',
+ 'boxVr;': '\u255f',
+ 'boxvR;': '\u255e',
+ 'boxvr;': '\u251c',
+ 'bprime;': '\u2035',
+ 'Breve;': '\u02d8',
+ 'breve;': '\u02d8',
+ 'brvbar': '\xa6',
+ 'brvbar;': '\xa6',
+ 'Bscr;': '\u212c',
+ 'bscr;': '\U0001d4b7',
+ 'bsemi;': '\u204f',
+ 'bsim;': '\u223d',
+ 'bsime;': '\u22cd',
+ 'bsol;': '\\',
+ 'bsolb;': '\u29c5',
+ 'bsolhsub;': '\u27c8',
+ 'bull;': '\u2022',
+ 'bullet;': '\u2022',
+ 'bump;': '\u224e',
+ 'bumpE;': '\u2aae',
+ 'bumpe;': '\u224f',
+ 'Bumpeq;': '\u224e',
+ 'bumpeq;': '\u224f',
+ 'Cacute;': '\u0106',
+ 'cacute;': '\u0107',
+ 'Cap;': '\u22d2',
+ 'cap;': '\u2229',
+ 'capand;': '\u2a44',
+ 'capbrcup;': '\u2a49',
+ 'capcap;': '\u2a4b',
+ 'capcup;': '\u2a47',
+ 'capdot;': '\u2a40',
+ 'CapitalDifferentialD;': '\u2145',
+ 'caps;': '\u2229\ufe00',
+ 'caret;': '\u2041',
+ 'caron;': '\u02c7',
+ 'Cayleys;': '\u212d',
+ 'ccaps;': '\u2a4d',
+ 'Ccaron;': '\u010c',
+ 'ccaron;': '\u010d',
+ 'Ccedil': '\xc7',
+ 'ccedil': '\xe7',
+ 'Ccedil;': '\xc7',
+ 'ccedil;': '\xe7',
+ 'Ccirc;': '\u0108',
+ 'ccirc;': '\u0109',
+ 'Cconint;': '\u2230',
+ 'ccups;': '\u2a4c',
+ 'ccupssm;': '\u2a50',
+ 'Cdot;': '\u010a',
+ 'cdot;': '\u010b',
+ 'cedil': '\xb8',
+ 'cedil;': '\xb8',
+ 'Cedilla;': '\xb8',
+ 'cemptyv;': '\u29b2',
+ 'cent': '\xa2',
+ 'cent;': '\xa2',
+ 'CenterDot;': '\xb7',
+ 'centerdot;': '\xb7',
+ 'Cfr;': '\u212d',
+ 'cfr;': '\U0001d520',
+ 'CHcy;': '\u0427',
+ 'chcy;': '\u0447',
+ 'check;': '\u2713',
+ 'checkmark;': '\u2713',
+ 'Chi;': '\u03a7',
+ 'chi;': '\u03c7',
+ 'cir;': '\u25cb',
+ 'circ;': '\u02c6',
+ 'circeq;': '\u2257',
+ 'circlearrowleft;': '\u21ba',
+ 'circlearrowright;': '\u21bb',
+ 'circledast;': '\u229b',
+ 'circledcirc;': '\u229a',
+ 'circleddash;': '\u229d',
+ 'CircleDot;': '\u2299',
+ 'circledR;': '\xae',
+ 'circledS;': '\u24c8',
+ 'CircleMinus;': '\u2296',
+ 'CirclePlus;': '\u2295',
+ 'CircleTimes;': '\u2297',
+ 'cirE;': '\u29c3',
+ 'cire;': '\u2257',
+ 'cirfnint;': '\u2a10',
+ 'cirmid;': '\u2aef',
+ 'cirscir;': '\u29c2',
+ 'ClockwiseContourIntegral;': '\u2232',
+ 'CloseCurlyDoubleQuote;': '\u201d',
+ 'CloseCurlyQuote;': '\u2019',
+ 'clubs;': '\u2663',
+ 'clubsuit;': '\u2663',
+ 'Colon;': '\u2237',
+ 'colon;': ':',
+ 'Colone;': '\u2a74',
+ 'colone;': '\u2254',
+ 'coloneq;': '\u2254',
+ 'comma;': ',',
+ 'commat;': '@',
+ 'comp;': '\u2201',
+ 'compfn;': '\u2218',
+ 'complement;': '\u2201',
+ 'complexes;': '\u2102',
+ 'cong;': '\u2245',
+ 'congdot;': '\u2a6d',
+ 'Congruent;': '\u2261',
+ 'Conint;': '\u222f',
+ 'conint;': '\u222e',
+ 'ContourIntegral;': '\u222e',
+ 'Copf;': '\u2102',
+ 'copf;': '\U0001d554',
+ 'coprod;': '\u2210',
+ 'Coproduct;': '\u2210',
+ 'COPY': '\xa9',
+ 'copy': '\xa9',
+ 'COPY;': '\xa9',
+ 'copy;': '\xa9',
+ 'copysr;': '\u2117',
+ 'CounterClockwiseContourIntegral;': '\u2233',
+ 'crarr;': '\u21b5',
+ 'Cross;': '\u2a2f',
+ 'cross;': '\u2717',
+ 'Cscr;': '\U0001d49e',
+ 'cscr;': '\U0001d4b8',
+ 'csub;': '\u2acf',
+ 'csube;': '\u2ad1',
+ 'csup;': '\u2ad0',
+ 'csupe;': '\u2ad2',
+ 'ctdot;': '\u22ef',
+ 'cudarrl;': '\u2938',
+ 'cudarrr;': '\u2935',
+ 'cuepr;': '\u22de',
+ 'cuesc;': '\u22df',
+ 'cularr;': '\u21b6',
+ 'cularrp;': '\u293d',
+ 'Cup;': '\u22d3',
+ 'cup;': '\u222a',
+ 'cupbrcap;': '\u2a48',
+ 'CupCap;': '\u224d',
+ 'cupcap;': '\u2a46',
+ 'cupcup;': '\u2a4a',
+ 'cupdot;': '\u228d',
+ 'cupor;': '\u2a45',
+ 'cups;': '\u222a\ufe00',
+ 'curarr;': '\u21b7',
+ 'curarrm;': '\u293c',
+ 'curlyeqprec;': '\u22de',
+ 'curlyeqsucc;': '\u22df',
+ 'curlyvee;': '\u22ce',
+ 'curlywedge;': '\u22cf',
+ 'curren': '\xa4',
+ 'curren;': '\xa4',
+ 'curvearrowleft;': '\u21b6',
+ 'curvearrowright;': '\u21b7',
+ 'cuvee;': '\u22ce',
+ 'cuwed;': '\u22cf',
+ 'cwconint;': '\u2232',
+ 'cwint;': '\u2231',
+ 'cylcty;': '\u232d',
+ 'Dagger;': '\u2021',
+ 'dagger;': '\u2020',
+ 'daleth;': '\u2138',
+ 'Darr;': '\u21a1',
+ 'dArr;': '\u21d3',
+ 'darr;': '\u2193',
+ 'dash;': '\u2010',
+ 'Dashv;': '\u2ae4',
+ 'dashv;': '\u22a3',
+ 'dbkarow;': '\u290f',
+ 'dblac;': '\u02dd',
+ 'Dcaron;': '\u010e',
+ 'dcaron;': '\u010f',
+ 'Dcy;': '\u0414',
+ 'dcy;': '\u0434',
+ 'DD;': '\u2145',
+ 'dd;': '\u2146',
+ 'ddagger;': '\u2021',
+ 'ddarr;': '\u21ca',
+ 'DDotrahd;': '\u2911',
+ 'ddotseq;': '\u2a77',
+ 'deg': '\xb0',
+ 'deg;': '\xb0',
+ 'Del;': '\u2207',
+ 'Delta;': '\u0394',
+ 'delta;': '\u03b4',
+ 'demptyv;': '\u29b1',
+ 'dfisht;': '\u297f',
+ 'Dfr;': '\U0001d507',
+ 'dfr;': '\U0001d521',
+ 'dHar;': '\u2965',
+ 'dharl;': '\u21c3',
+ 'dharr;': '\u21c2',
+ 'DiacriticalAcute;': '\xb4',
+ 'DiacriticalDot;': '\u02d9',
+ 'DiacriticalDoubleAcute;': '\u02dd',
+ 'DiacriticalGrave;': '`',
+ 'DiacriticalTilde;': '\u02dc',
+ 'diam;': '\u22c4',
+ 'Diamond;': '\u22c4',
+ 'diamond;': '\u22c4',
+ 'diamondsuit;': '\u2666',
+ 'diams;': '\u2666',
+ 'die;': '\xa8',
+ 'DifferentialD;': '\u2146',
+ 'digamma;': '\u03dd',
+ 'disin;': '\u22f2',
+ 'div;': '\xf7',
+ 'divide': '\xf7',
+ 'divide;': '\xf7',
+ 'divideontimes;': '\u22c7',
+ 'divonx;': '\u22c7',
+ 'DJcy;': '\u0402',
+ 'djcy;': '\u0452',
+ 'dlcorn;': '\u231e',
+ 'dlcrop;': '\u230d',
+ 'dollar;': '$',
+ 'Dopf;': '\U0001d53b',
+ 'dopf;': '\U0001d555',
+ 'Dot;': '\xa8',
+ 'dot;': '\u02d9',
+ 'DotDot;': '\u20dc',
+ 'doteq;': '\u2250',
+ 'doteqdot;': '\u2251',
+ 'DotEqual;': '\u2250',
+ 'dotminus;': '\u2238',
+ 'dotplus;': '\u2214',
+ 'dotsquare;': '\u22a1',
+ 'doublebarwedge;': '\u2306',
+ 'DoubleContourIntegral;': '\u222f',
+ 'DoubleDot;': '\xa8',
+ 'DoubleDownArrow;': '\u21d3',
+ 'DoubleLeftArrow;': '\u21d0',
+ 'DoubleLeftRightArrow;': '\u21d4',
+ 'DoubleLeftTee;': '\u2ae4',
+ 'DoubleLongLeftArrow;': '\u27f8',
+ 'DoubleLongLeftRightArrow;': '\u27fa',
+ 'DoubleLongRightArrow;': '\u27f9',
+ 'DoubleRightArrow;': '\u21d2',
+ 'DoubleRightTee;': '\u22a8',
+ 'DoubleUpArrow;': '\u21d1',
+ 'DoubleUpDownArrow;': '\u21d5',
+ 'DoubleVerticalBar;': '\u2225',
+ 'DownArrow;': '\u2193',
+ 'Downarrow;': '\u21d3',
+ 'downarrow;': '\u2193',
+ 'DownArrowBar;': '\u2913',
+ 'DownArrowUpArrow;': '\u21f5',
+ 'DownBreve;': '\u0311',
+ 'downdownarrows;': '\u21ca',
+ 'downharpoonleft;': '\u21c3',
+ 'downharpoonright;': '\u21c2',
+ 'DownLeftRightVector;': '\u2950',
+ 'DownLeftTeeVector;': '\u295e',
+ 'DownLeftVector;': '\u21bd',
+ 'DownLeftVectorBar;': '\u2956',
+ 'DownRightTeeVector;': '\u295f',
+ 'DownRightVector;': '\u21c1',
+ 'DownRightVectorBar;': '\u2957',
+ 'DownTee;': '\u22a4',
+ 'DownTeeArrow;': '\u21a7',
+ 'drbkarow;': '\u2910',
+ 'drcorn;': '\u231f',
+ 'drcrop;': '\u230c',
+ 'Dscr;': '\U0001d49f',
+ 'dscr;': '\U0001d4b9',
+ 'DScy;': '\u0405',
+ 'dscy;': '\u0455',
+ 'dsol;': '\u29f6',
+ 'Dstrok;': '\u0110',
+ 'dstrok;': '\u0111',
+ 'dtdot;': '\u22f1',
+ 'dtri;': '\u25bf',
+ 'dtrif;': '\u25be',
+ 'duarr;': '\u21f5',
+ 'duhar;': '\u296f',
+ 'dwangle;': '\u29a6',
+ 'DZcy;': '\u040f',
+ 'dzcy;': '\u045f',
+ 'dzigrarr;': '\u27ff',
+ 'Eacute': '\xc9',
+ 'eacute': '\xe9',
+ 'Eacute;': '\xc9',
+ 'eacute;': '\xe9',
+ 'easter;': '\u2a6e',
+ 'Ecaron;': '\u011a',
+ 'ecaron;': '\u011b',
+ 'ecir;': '\u2256',
+ 'Ecirc': '\xca',
+ 'ecirc': '\xea',
+ 'Ecirc;': '\xca',
+ 'ecirc;': '\xea',
+ 'ecolon;': '\u2255',
+ 'Ecy;': '\u042d',
+ 'ecy;': '\u044d',
+ 'eDDot;': '\u2a77',
+ 'Edot;': '\u0116',
+ 'eDot;': '\u2251',
+ 'edot;': '\u0117',
+ 'ee;': '\u2147',
+ 'efDot;': '\u2252',
+ 'Efr;': '\U0001d508',
+ 'efr;': '\U0001d522',
+ 'eg;': '\u2a9a',
+ 'Egrave': '\xc8',
+ 'egrave': '\xe8',
+ 'Egrave;': '\xc8',
+ 'egrave;': '\xe8',
+ 'egs;': '\u2a96',
+ 'egsdot;': '\u2a98',
+ 'el;': '\u2a99',
+ 'Element;': '\u2208',
+ 'elinters;': '\u23e7',
+ 'ell;': '\u2113',
+ 'els;': '\u2a95',
+ 'elsdot;': '\u2a97',
+ 'Emacr;': '\u0112',
+ 'emacr;': '\u0113',
+ 'empty;': '\u2205',
+ 'emptyset;': '\u2205',
+ 'EmptySmallSquare;': '\u25fb',
+ 'emptyv;': '\u2205',
+ 'EmptyVerySmallSquare;': '\u25ab',
+ 'emsp13;': '\u2004',
+ 'emsp14;': '\u2005',
+ 'emsp;': '\u2003',
+ 'ENG;': '\u014a',
+ 'eng;': '\u014b',
+ 'ensp;': '\u2002',
+ 'Eogon;': '\u0118',
+ 'eogon;': '\u0119',
+ 'Eopf;': '\U0001d53c',
+ 'eopf;': '\U0001d556',
+ 'epar;': '\u22d5',
+ 'eparsl;': '\u29e3',
+ 'eplus;': '\u2a71',
+ 'epsi;': '\u03b5',
+ 'Epsilon;': '\u0395',
+ 'epsilon;': '\u03b5',
+ 'epsiv;': '\u03f5',
+ 'eqcirc;': '\u2256',
+ 'eqcolon;': '\u2255',
+ 'eqsim;': '\u2242',
+ 'eqslantgtr;': '\u2a96',
+ 'eqslantless;': '\u2a95',
+ 'Equal;': '\u2a75',
+ 'equals;': '=',
+ 'EqualTilde;': '\u2242',
+ 'equest;': '\u225f',
+ 'Equilibrium;': '\u21cc',
+ 'equiv;': '\u2261',
+ 'equivDD;': '\u2a78',
+ 'eqvparsl;': '\u29e5',
+ 'erarr;': '\u2971',
+ 'erDot;': '\u2253',
+ 'Escr;': '\u2130',
+ 'escr;': '\u212f',
+ 'esdot;': '\u2250',
+ 'Esim;': '\u2a73',
+ 'esim;': '\u2242',
+ 'Eta;': '\u0397',
+ 'eta;': '\u03b7',
+ 'ETH': '\xd0',
+ 'eth': '\xf0',
+ 'ETH;': '\xd0',
+ 'eth;': '\xf0',
+ 'Euml': '\xcb',
+ 'euml': '\xeb',
+ 'Euml;': '\xcb',
+ 'euml;': '\xeb',
+ 'euro;': '\u20ac',
+ 'excl;': '!',
+ 'exist;': '\u2203',
+ 'Exists;': '\u2203',
+ 'expectation;': '\u2130',
+ 'ExponentialE;': '\u2147',
+ 'exponentiale;': '\u2147',
+ 'fallingdotseq;': '\u2252',
+ 'Fcy;': '\u0424',
+ 'fcy;': '\u0444',
+ 'female;': '\u2640',
+ 'ffilig;': '\ufb03',
+ 'fflig;': '\ufb00',
+ 'ffllig;': '\ufb04',
+ 'Ffr;': '\U0001d509',
+ 'ffr;': '\U0001d523',
+ 'filig;': '\ufb01',
+ 'FilledSmallSquare;': '\u25fc',
+ 'FilledVerySmallSquare;': '\u25aa',
+ 'fjlig;': 'fj',
+ 'flat;': '\u266d',
+ 'fllig;': '\ufb02',
+ 'fltns;': '\u25b1',
+ 'fnof;': '\u0192',
+ 'Fopf;': '\U0001d53d',
+ 'fopf;': '\U0001d557',
+ 'ForAll;': '\u2200',
+ 'forall;': '\u2200',
+ 'fork;': '\u22d4',
+ 'forkv;': '\u2ad9',
+ 'Fouriertrf;': '\u2131',
+ 'fpartint;': '\u2a0d',
+ 'frac12': '\xbd',
+ 'frac12;': '\xbd',
+ 'frac13;': '\u2153',
+ 'frac14': '\xbc',
+ 'frac14;': '\xbc',
+ 'frac15;': '\u2155',
+ 'frac16;': '\u2159',
+ 'frac18;': '\u215b',
+ 'frac23;': '\u2154',
+ 'frac25;': '\u2156',
+ 'frac34': '\xbe',
+ 'frac34;': '\xbe',
+ 'frac35;': '\u2157',
+ 'frac38;': '\u215c',
+ 'frac45;': '\u2158',
+ 'frac56;': '\u215a',
+ 'frac58;': '\u215d',
+ 'frac78;': '\u215e',
+ 'frasl;': '\u2044',
+ 'frown;': '\u2322',
+ 'Fscr;': '\u2131',
+ 'fscr;': '\U0001d4bb',
+ 'gacute;': '\u01f5',
+ 'Gamma;': '\u0393',
+ 'gamma;': '\u03b3',
+ 'Gammad;': '\u03dc',
+ 'gammad;': '\u03dd',
+ 'gap;': '\u2a86',
+ 'Gbreve;': '\u011e',
+ 'gbreve;': '\u011f',
+ 'Gcedil;': '\u0122',
+ 'Gcirc;': '\u011c',
+ 'gcirc;': '\u011d',
+ 'Gcy;': '\u0413',
+ 'gcy;': '\u0433',
+ 'Gdot;': '\u0120',
+ 'gdot;': '\u0121',
+ 'gE;': '\u2267',
+ 'ge;': '\u2265',
+ 'gEl;': '\u2a8c',
+ 'gel;': '\u22db',
+ 'geq;': '\u2265',
+ 'geqq;': '\u2267',
+ 'geqslant;': '\u2a7e',
+ 'ges;': '\u2a7e',
+ 'gescc;': '\u2aa9',
+ 'gesdot;': '\u2a80',
+ 'gesdoto;': '\u2a82',
+ 'gesdotol;': '\u2a84',
+ 'gesl;': '\u22db\ufe00',
+ 'gesles;': '\u2a94',
+ 'Gfr;': '\U0001d50a',
+ 'gfr;': '\U0001d524',
+ 'Gg;': '\u22d9',
+ 'gg;': '\u226b',
+ 'ggg;': '\u22d9',
+ 'gimel;': '\u2137',
+ 'GJcy;': '\u0403',
+ 'gjcy;': '\u0453',
+ 'gl;': '\u2277',
+ 'gla;': '\u2aa5',
+ 'glE;': '\u2a92',
+ 'glj;': '\u2aa4',
+ 'gnap;': '\u2a8a',
+ 'gnapprox;': '\u2a8a',
+ 'gnE;': '\u2269',
+ 'gne;': '\u2a88',
+ 'gneq;': '\u2a88',
+ 'gneqq;': '\u2269',
+ 'gnsim;': '\u22e7',
+ 'Gopf;': '\U0001d53e',
+ 'gopf;': '\U0001d558',
+ 'grave;': '`',
+ 'GreaterEqual;': '\u2265',
+ 'GreaterEqualLess;': '\u22db',
+ 'GreaterFullEqual;': '\u2267',
+ 'GreaterGreater;': '\u2aa2',
+ 'GreaterLess;': '\u2277',
+ 'GreaterSlantEqual;': '\u2a7e',
+ 'GreaterTilde;': '\u2273',
+ 'Gscr;': '\U0001d4a2',
+ 'gscr;': '\u210a',
+ 'gsim;': '\u2273',
+ 'gsime;': '\u2a8e',
+ 'gsiml;': '\u2a90',
+ 'GT': '>',
+ 'gt': '>',
+ 'GT;': '>',
+ 'Gt;': '\u226b',
+ 'gt;': '>',
+ 'gtcc;': '\u2aa7',
+ 'gtcir;': '\u2a7a',
+ 'gtdot;': '\u22d7',
+ 'gtlPar;': '\u2995',
+ 'gtquest;': '\u2a7c',
+ 'gtrapprox;': '\u2a86',
+ 'gtrarr;': '\u2978',
+ 'gtrdot;': '\u22d7',
+ 'gtreqless;': '\u22db',
+ 'gtreqqless;': '\u2a8c',
+ 'gtrless;': '\u2277',
+ 'gtrsim;': '\u2273',
+ 'gvertneqq;': '\u2269\ufe00',
+ 'gvnE;': '\u2269\ufe00',
+ 'Hacek;': '\u02c7',
+ 'hairsp;': '\u200a',
+ 'half;': '\xbd',
+ 'hamilt;': '\u210b',
+ 'HARDcy;': '\u042a',
+ 'hardcy;': '\u044a',
+ 'hArr;': '\u21d4',
+ 'harr;': '\u2194',
+ 'harrcir;': '\u2948',
+ 'harrw;': '\u21ad',
+ 'Hat;': '^',
+ 'hbar;': '\u210f',
+ 'Hcirc;': '\u0124',
+ 'hcirc;': '\u0125',
+ 'hearts;': '\u2665',
+ 'heartsuit;': '\u2665',
+ 'hellip;': '\u2026',
+ 'hercon;': '\u22b9',
+ 'Hfr;': '\u210c',
+ 'hfr;': '\U0001d525',
+ 'HilbertSpace;': '\u210b',
+ 'hksearow;': '\u2925',
+ 'hkswarow;': '\u2926',
+ 'hoarr;': '\u21ff',
+ 'homtht;': '\u223b',
+ 'hookleftarrow;': '\u21a9',
+ 'hookrightarrow;': '\u21aa',
+ 'Hopf;': '\u210d',
+ 'hopf;': '\U0001d559',
+ 'horbar;': '\u2015',
+ 'HorizontalLine;': '\u2500',
+ 'Hscr;': '\u210b',
+ 'hscr;': '\U0001d4bd',
+ 'hslash;': '\u210f',
+ 'Hstrok;': '\u0126',
+ 'hstrok;': '\u0127',
+ 'HumpDownHump;': '\u224e',
+ 'HumpEqual;': '\u224f',
+ 'hybull;': '\u2043',
+ 'hyphen;': '\u2010',
+ 'Iacute': '\xcd',
+ 'iacute': '\xed',
+ 'Iacute;': '\xcd',
+ 'iacute;': '\xed',
+ 'ic;': '\u2063',
+ 'Icirc': '\xce',
+ 'icirc': '\xee',
+ 'Icirc;': '\xce',
+ 'icirc;': '\xee',
+ 'Icy;': '\u0418',
+ 'icy;': '\u0438',
+ 'Idot;': '\u0130',
+ 'IEcy;': '\u0415',
+ 'iecy;': '\u0435',
+ 'iexcl': '\xa1',
+ 'iexcl;': '\xa1',
+ 'iff;': '\u21d4',
+ 'Ifr;': '\u2111',
+ 'ifr;': '\U0001d526',
+ 'Igrave': '\xcc',
+ 'igrave': '\xec',
+ 'Igrave;': '\xcc',
+ 'igrave;': '\xec',
+ 'ii;': '\u2148',
+ 'iiiint;': '\u2a0c',
+ 'iiint;': '\u222d',
+ 'iinfin;': '\u29dc',
+ 'iiota;': '\u2129',
+ 'IJlig;': '\u0132',
+ 'ijlig;': '\u0133',
+ 'Im;': '\u2111',
+ 'Imacr;': '\u012a',
+ 'imacr;': '\u012b',
+ 'image;': '\u2111',
+ 'ImaginaryI;': '\u2148',
+ 'imagline;': '\u2110',
+ 'imagpart;': '\u2111',
+ 'imath;': '\u0131',
+ 'imof;': '\u22b7',
+ 'imped;': '\u01b5',
+ 'Implies;': '\u21d2',
+ 'in;': '\u2208',
+ 'incare;': '\u2105',
+ 'infin;': '\u221e',
+ 'infintie;': '\u29dd',
+ 'inodot;': '\u0131',
+ 'Int;': '\u222c',
+ 'int;': '\u222b',
+ 'intcal;': '\u22ba',
+ 'integers;': '\u2124',
+ 'Integral;': '\u222b',
+ 'intercal;': '\u22ba',
+ 'Intersection;': '\u22c2',
+ 'intlarhk;': '\u2a17',
+ 'intprod;': '\u2a3c',
+ 'InvisibleComma;': '\u2063',
+ 'InvisibleTimes;': '\u2062',
+ 'IOcy;': '\u0401',
+ 'iocy;': '\u0451',
+ 'Iogon;': '\u012e',
+ 'iogon;': '\u012f',
+ 'Iopf;': '\U0001d540',
+ 'iopf;': '\U0001d55a',
+ 'Iota;': '\u0399',
+ 'iota;': '\u03b9',
+ 'iprod;': '\u2a3c',
+ 'iquest': '\xbf',
+ 'iquest;': '\xbf',
+ 'Iscr;': '\u2110',
+ 'iscr;': '\U0001d4be',
+ 'isin;': '\u2208',
+ 'isindot;': '\u22f5',
+ 'isinE;': '\u22f9',
+ 'isins;': '\u22f4',
+ 'isinsv;': '\u22f3',
+ 'isinv;': '\u2208',
+ 'it;': '\u2062',
+ 'Itilde;': '\u0128',
+ 'itilde;': '\u0129',
+ 'Iukcy;': '\u0406',
+ 'iukcy;': '\u0456',
+ 'Iuml': '\xcf',
+ 'iuml': '\xef',
+ 'Iuml;': '\xcf',
+ 'iuml;': '\xef',
+ 'Jcirc;': '\u0134',
+ 'jcirc;': '\u0135',
+ 'Jcy;': '\u0419',
+ 'jcy;': '\u0439',
+ 'Jfr;': '\U0001d50d',
+ 'jfr;': '\U0001d527',
+ 'jmath;': '\u0237',
+ 'Jopf;': '\U0001d541',
+ 'jopf;': '\U0001d55b',
+ 'Jscr;': '\U0001d4a5',
+ 'jscr;': '\U0001d4bf',
+ 'Jsercy;': '\u0408',
+ 'jsercy;': '\u0458',
+ 'Jukcy;': '\u0404',
+ 'jukcy;': '\u0454',
+ 'Kappa;': '\u039a',
+ 'kappa;': '\u03ba',
+ 'kappav;': '\u03f0',
+ 'Kcedil;': '\u0136',
+ 'kcedil;': '\u0137',
+ 'Kcy;': '\u041a',
+ 'kcy;': '\u043a',
+ 'Kfr;': '\U0001d50e',
+ 'kfr;': '\U0001d528',
+ 'kgreen;': '\u0138',
+ 'KHcy;': '\u0425',
+ 'khcy;': '\u0445',
+ 'KJcy;': '\u040c',
+ 'kjcy;': '\u045c',
+ 'Kopf;': '\U0001d542',
+ 'kopf;': '\U0001d55c',
+ 'Kscr;': '\U0001d4a6',
+ 'kscr;': '\U0001d4c0',
+ 'lAarr;': '\u21da',
+ 'Lacute;': '\u0139',
+ 'lacute;': '\u013a',
+ 'laemptyv;': '\u29b4',
+ 'lagran;': '\u2112',
+ 'Lambda;': '\u039b',
+ 'lambda;': '\u03bb',
+ 'Lang;': '\u27ea',
+ 'lang;': '\u27e8',
+ 'langd;': '\u2991',
+ 'langle;': '\u27e8',
+ 'lap;': '\u2a85',
+ 'Laplacetrf;': '\u2112',
+ 'laquo': '\xab',
+ 'laquo;': '\xab',
+ 'Larr;': '\u219e',
+ 'lArr;': '\u21d0',
+ 'larr;': '\u2190',
+ 'larrb;': '\u21e4',
+ 'larrbfs;': '\u291f',
+ 'larrfs;': '\u291d',
+ 'larrhk;': '\u21a9',
+ 'larrlp;': '\u21ab',
+ 'larrpl;': '\u2939',
+ 'larrsim;': '\u2973',
+ 'larrtl;': '\u21a2',
+ 'lat;': '\u2aab',
+ 'lAtail;': '\u291b',
+ 'latail;': '\u2919',
+ 'late;': '\u2aad',
+ 'lates;': '\u2aad\ufe00',
+ 'lBarr;': '\u290e',
+ 'lbarr;': '\u290c',
+ 'lbbrk;': '\u2772',
+ 'lbrace;': '{',
+ 'lbrack;': '[',
+ 'lbrke;': '\u298b',
+ 'lbrksld;': '\u298f',
+ 'lbrkslu;': '\u298d',
+ 'Lcaron;': '\u013d',
+ 'lcaron;': '\u013e',
+ 'Lcedil;': '\u013b',
+ 'lcedil;': '\u013c',
+ 'lceil;': '\u2308',
+ 'lcub;': '{',
+ 'Lcy;': '\u041b',
+ 'lcy;': '\u043b',
+ 'ldca;': '\u2936',
+ 'ldquo;': '\u201c',
+ 'ldquor;': '\u201e',
+ 'ldrdhar;': '\u2967',
+ 'ldrushar;': '\u294b',
+ 'ldsh;': '\u21b2',
+ 'lE;': '\u2266',
+ 'le;': '\u2264',
+ 'LeftAngleBracket;': '\u27e8',
+ 'LeftArrow;': '\u2190',
+ 'Leftarrow;': '\u21d0',
+ 'leftarrow;': '\u2190',
+ 'LeftArrowBar;': '\u21e4',
+ 'LeftArrowRightArrow;': '\u21c6',
+ 'leftarrowtail;': '\u21a2',
+ 'LeftCeiling;': '\u2308',
+ 'LeftDoubleBracket;': '\u27e6',
+ 'LeftDownTeeVector;': '\u2961',
+ 'LeftDownVector;': '\u21c3',
+ 'LeftDownVectorBar;': '\u2959',
+ 'LeftFloor;': '\u230a',
+ 'leftharpoondown;': '\u21bd',
+ 'leftharpoonup;': '\u21bc',
+ 'leftleftarrows;': '\u21c7',
+ 'LeftRightArrow;': '\u2194',
+ 'Leftrightarrow;': '\u21d4',
+ 'leftrightarrow;': '\u2194',
+ 'leftrightarrows;': '\u21c6',
+ 'leftrightharpoons;': '\u21cb',
+ 'leftrightsquigarrow;': '\u21ad',
+ 'LeftRightVector;': '\u294e',
+ 'LeftTee;': '\u22a3',
+ 'LeftTeeArrow;': '\u21a4',
+ 'LeftTeeVector;': '\u295a',
+ 'leftthreetimes;': '\u22cb',
+ 'LeftTriangle;': '\u22b2',
+ 'LeftTriangleBar;': '\u29cf',
+ 'LeftTriangleEqual;': '\u22b4',
+ 'LeftUpDownVector;': '\u2951',
+ 'LeftUpTeeVector;': '\u2960',
+ 'LeftUpVector;': '\u21bf',
+ 'LeftUpVectorBar;': '\u2958',
+ 'LeftVector;': '\u21bc',
+ 'LeftVectorBar;': '\u2952',
+ 'lEg;': '\u2a8b',
+ 'leg;': '\u22da',
+ 'leq;': '\u2264',
+ 'leqq;': '\u2266',
+ 'leqslant;': '\u2a7d',
+ 'les;': '\u2a7d',
+ 'lescc;': '\u2aa8',
+ 'lesdot;': '\u2a7f',
+ 'lesdoto;': '\u2a81',
+ 'lesdotor;': '\u2a83',
+ 'lesg;': '\u22da\ufe00',
+ 'lesges;': '\u2a93',
+ 'lessapprox;': '\u2a85',
+ 'lessdot;': '\u22d6',
+ 'lesseqgtr;': '\u22da',
+ 'lesseqqgtr;': '\u2a8b',
+ 'LessEqualGreater;': '\u22da',
+ 'LessFullEqual;': '\u2266',
+ 'LessGreater;': '\u2276',
+ 'lessgtr;': '\u2276',
+ 'LessLess;': '\u2aa1',
+ 'lesssim;': '\u2272',
+ 'LessSlantEqual;': '\u2a7d',
+ 'LessTilde;': '\u2272',
+ 'lfisht;': '\u297c',
+ 'lfloor;': '\u230a',
+ 'Lfr;': '\U0001d50f',
+ 'lfr;': '\U0001d529',
+ 'lg;': '\u2276',
+ 'lgE;': '\u2a91',
+ 'lHar;': '\u2962',
+ 'lhard;': '\u21bd',
+ 'lharu;': '\u21bc',
+ 'lharul;': '\u296a',
+ 'lhblk;': '\u2584',
+ 'LJcy;': '\u0409',
+ 'ljcy;': '\u0459',
+ 'Ll;': '\u22d8',
+ 'll;': '\u226a',
+ 'llarr;': '\u21c7',
+ 'llcorner;': '\u231e',
+ 'Lleftarrow;': '\u21da',
+ 'llhard;': '\u296b',
+ 'lltri;': '\u25fa',
+ 'Lmidot;': '\u013f',
+ 'lmidot;': '\u0140',
+ 'lmoust;': '\u23b0',
+ 'lmoustache;': '\u23b0',
+ 'lnap;': '\u2a89',
+ 'lnapprox;': '\u2a89',
+ 'lnE;': '\u2268',
+ 'lne;': '\u2a87',
+ 'lneq;': '\u2a87',
+ 'lneqq;': '\u2268',
+ 'lnsim;': '\u22e6',
+ 'loang;': '\u27ec',
+ 'loarr;': '\u21fd',
+ 'lobrk;': '\u27e6',
+ 'LongLeftArrow;': '\u27f5',
+ 'Longleftarrow;': '\u27f8',
+ 'longleftarrow;': '\u27f5',
+ 'LongLeftRightArrow;': '\u27f7',
+ 'Longleftrightarrow;': '\u27fa',
+ 'longleftrightarrow;': '\u27f7',
+ 'longmapsto;': '\u27fc',
+ 'LongRightArrow;': '\u27f6',
+ 'Longrightarrow;': '\u27f9',
+ 'longrightarrow;': '\u27f6',
+ 'looparrowleft;': '\u21ab',
+ 'looparrowright;': '\u21ac',
+ 'lopar;': '\u2985',
+ 'Lopf;': '\U0001d543',
+ 'lopf;': '\U0001d55d',
+ 'loplus;': '\u2a2d',
+ 'lotimes;': '\u2a34',
+ 'lowast;': '\u2217',
+ 'lowbar;': '_',
+ 'LowerLeftArrow;': '\u2199',
+ 'LowerRightArrow;': '\u2198',
+ 'loz;': '\u25ca',
+ 'lozenge;': '\u25ca',
+ 'lozf;': '\u29eb',
+ 'lpar;': '(',
+ 'lparlt;': '\u2993',
+ 'lrarr;': '\u21c6',
+ 'lrcorner;': '\u231f',
+ 'lrhar;': '\u21cb',
+ 'lrhard;': '\u296d',
+ 'lrm;': '\u200e',
+ 'lrtri;': '\u22bf',
+ 'lsaquo;': '\u2039',
+ 'Lscr;': '\u2112',
+ 'lscr;': '\U0001d4c1',
+ 'Lsh;': '\u21b0',
+ 'lsh;': '\u21b0',
+ 'lsim;': '\u2272',
+ 'lsime;': '\u2a8d',
+ 'lsimg;': '\u2a8f',
+ 'lsqb;': '[',
+ 'lsquo;': '\u2018',
+ 'lsquor;': '\u201a',
+ 'Lstrok;': '\u0141',
+ 'lstrok;': '\u0142',
+ 'LT': '<',
+ 'lt': '<',
+ 'LT;': '<',
+ 'Lt;': '\u226a',
+ 'lt;': '<',
+ 'ltcc;': '\u2aa6',
+ 'ltcir;': '\u2a79',
+ 'ltdot;': '\u22d6',
+ 'lthree;': '\u22cb',
+ 'ltimes;': '\u22c9',
+ 'ltlarr;': '\u2976',
+ 'ltquest;': '\u2a7b',
+ 'ltri;': '\u25c3',
+ 'ltrie;': '\u22b4',
+ 'ltrif;': '\u25c2',
+ 'ltrPar;': '\u2996',
+ 'lurdshar;': '\u294a',
+ 'luruhar;': '\u2966',
+ 'lvertneqq;': '\u2268\ufe00',
+ 'lvnE;': '\u2268\ufe00',
+ 'macr': '\xaf',
+ 'macr;': '\xaf',
+ 'male;': '\u2642',
+ 'malt;': '\u2720',
+ 'maltese;': '\u2720',
+ 'Map;': '\u2905',
+ 'map;': '\u21a6',
+ 'mapsto;': '\u21a6',
+ 'mapstodown;': '\u21a7',
+ 'mapstoleft;': '\u21a4',
+ 'mapstoup;': '\u21a5',
+ 'marker;': '\u25ae',
+ 'mcomma;': '\u2a29',
+ 'Mcy;': '\u041c',
+ 'mcy;': '\u043c',
+ 'mdash;': '\u2014',
+ 'mDDot;': '\u223a',
+ 'measuredangle;': '\u2221',
+ 'MediumSpace;': '\u205f',
+ 'Mellintrf;': '\u2133',
+ 'Mfr;': '\U0001d510',
+ 'mfr;': '\U0001d52a',
+ 'mho;': '\u2127',
+ 'micro': '\xb5',
+ 'micro;': '\xb5',
+ 'mid;': '\u2223',
+ 'midast;': '*',
+ 'midcir;': '\u2af0',
+ 'middot': '\xb7',
+ 'middot;': '\xb7',
+ 'minus;': '\u2212',
+ 'minusb;': '\u229f',
+ 'minusd;': '\u2238',
+ 'minusdu;': '\u2a2a',
+ 'MinusPlus;': '\u2213',
+ 'mlcp;': '\u2adb',
+ 'mldr;': '\u2026',
+ 'mnplus;': '\u2213',
+ 'models;': '\u22a7',
+ 'Mopf;': '\U0001d544',
+ 'mopf;': '\U0001d55e',
+ 'mp;': '\u2213',
+ 'Mscr;': '\u2133',
+ 'mscr;': '\U0001d4c2',
+ 'mstpos;': '\u223e',
+ 'Mu;': '\u039c',
+ 'mu;': '\u03bc',
+ 'multimap;': '\u22b8',
+ 'mumap;': '\u22b8',
+ 'nabla;': '\u2207',
+ 'Nacute;': '\u0143',
+ 'nacute;': '\u0144',
+ 'nang;': '\u2220\u20d2',
+ 'nap;': '\u2249',
+ 'napE;': '\u2a70\u0338',
+ 'napid;': '\u224b\u0338',
+ 'napos;': '\u0149',
+ 'napprox;': '\u2249',
+ 'natur;': '\u266e',
+ 'natural;': '\u266e',
+ 'naturals;': '\u2115',
+ 'nbsp': '\xa0',
+ 'nbsp;': '\xa0',
+ 'nbump;': '\u224e\u0338',
+ 'nbumpe;': '\u224f\u0338',
+ 'ncap;': '\u2a43',
+ 'Ncaron;': '\u0147',
+ 'ncaron;': '\u0148',
+ 'Ncedil;': '\u0145',
+ 'ncedil;': '\u0146',
+ 'ncong;': '\u2247',
+ 'ncongdot;': '\u2a6d\u0338',
+ 'ncup;': '\u2a42',
+ 'Ncy;': '\u041d',
+ 'ncy;': '\u043d',
+ 'ndash;': '\u2013',
+ 'ne;': '\u2260',
+ 'nearhk;': '\u2924',
+ 'neArr;': '\u21d7',
+ 'nearr;': '\u2197',
+ 'nearrow;': '\u2197',
+ 'nedot;': '\u2250\u0338',
+ 'NegativeMediumSpace;': '\u200b',
+ 'NegativeThickSpace;': '\u200b',
+ 'NegativeThinSpace;': '\u200b',
+ 'NegativeVeryThinSpace;': '\u200b',
+ 'nequiv;': '\u2262',
+ 'nesear;': '\u2928',
+ 'nesim;': '\u2242\u0338',
+ 'NestedGreaterGreater;': '\u226b',
+ 'NestedLessLess;': '\u226a',
+ 'NewLine;': '\n',
+ 'nexist;': '\u2204',
+ 'nexists;': '\u2204',
+ 'Nfr;': '\U0001d511',
+ 'nfr;': '\U0001d52b',
+ 'ngE;': '\u2267\u0338',
+ 'nge;': '\u2271',
+ 'ngeq;': '\u2271',
+ 'ngeqq;': '\u2267\u0338',
+ 'ngeqslant;': '\u2a7e\u0338',
+ 'nges;': '\u2a7e\u0338',
+ 'nGg;': '\u22d9\u0338',
+ 'ngsim;': '\u2275',
+ 'nGt;': '\u226b\u20d2',
+ 'ngt;': '\u226f',
+ 'ngtr;': '\u226f',
+ 'nGtv;': '\u226b\u0338',
+ 'nhArr;': '\u21ce',
+ 'nharr;': '\u21ae',
+ 'nhpar;': '\u2af2',
+ 'ni;': '\u220b',
+ 'nis;': '\u22fc',
+ 'nisd;': '\u22fa',
+ 'niv;': '\u220b',
+ 'NJcy;': '\u040a',
+ 'njcy;': '\u045a',
+ 'nlArr;': '\u21cd',
+ 'nlarr;': '\u219a',
+ 'nldr;': '\u2025',
+ 'nlE;': '\u2266\u0338',
+ 'nle;': '\u2270',
+ 'nLeftarrow;': '\u21cd',
+ 'nleftarrow;': '\u219a',
+ 'nLeftrightarrow;': '\u21ce',
+ 'nleftrightarrow;': '\u21ae',
+ 'nleq;': '\u2270',
+ 'nleqq;': '\u2266\u0338',
+ 'nleqslant;': '\u2a7d\u0338',
+ 'nles;': '\u2a7d\u0338',
+ 'nless;': '\u226e',
+ 'nLl;': '\u22d8\u0338',
+ 'nlsim;': '\u2274',
+ 'nLt;': '\u226a\u20d2',
+ 'nlt;': '\u226e',
+ 'nltri;': '\u22ea',
+ 'nltrie;': '\u22ec',
+ 'nLtv;': '\u226a\u0338',
+ 'nmid;': '\u2224',
+ 'NoBreak;': '\u2060',
+ 'NonBreakingSpace;': '\xa0',
+ 'Nopf;': '\u2115',
+ 'nopf;': '\U0001d55f',
+ 'not': '\xac',
+ 'Not;': '\u2aec',
+ 'not;': '\xac',
+ 'NotCongruent;': '\u2262',
+ 'NotCupCap;': '\u226d',
+ 'NotDoubleVerticalBar;': '\u2226',
+ 'NotElement;': '\u2209',
+ 'NotEqual;': '\u2260',
+ 'NotEqualTilde;': '\u2242\u0338',
+ 'NotExists;': '\u2204',
+ 'NotGreater;': '\u226f',
+ 'NotGreaterEqual;': '\u2271',
+ 'NotGreaterFullEqual;': '\u2267\u0338',
+ 'NotGreaterGreater;': '\u226b\u0338',
+ 'NotGreaterLess;': '\u2279',
+ 'NotGreaterSlantEqual;': '\u2a7e\u0338',
+ 'NotGreaterTilde;': '\u2275',
+ 'NotHumpDownHump;': '\u224e\u0338',
+ 'NotHumpEqual;': '\u224f\u0338',
+ 'notin;': '\u2209',
+ 'notindot;': '\u22f5\u0338',
+ 'notinE;': '\u22f9\u0338',
+ 'notinva;': '\u2209',
+ 'notinvb;': '\u22f7',
+ 'notinvc;': '\u22f6',
+ 'NotLeftTriangle;': '\u22ea',
+ 'NotLeftTriangleBar;': '\u29cf\u0338',
+ 'NotLeftTriangleEqual;': '\u22ec',
+ 'NotLess;': '\u226e',
+ 'NotLessEqual;': '\u2270',
+ 'NotLessGreater;': '\u2278',
+ 'NotLessLess;': '\u226a\u0338',
+ 'NotLessSlantEqual;': '\u2a7d\u0338',
+ 'NotLessTilde;': '\u2274',
+ 'NotNestedGreaterGreater;': '\u2aa2\u0338',
+ 'NotNestedLessLess;': '\u2aa1\u0338',
+ 'notni;': '\u220c',
+ 'notniva;': '\u220c',
+ 'notnivb;': '\u22fe',
+ 'notnivc;': '\u22fd',
+ 'NotPrecedes;': '\u2280',
+ 'NotPrecedesEqual;': '\u2aaf\u0338',
+ 'NotPrecedesSlantEqual;': '\u22e0',
+ 'NotReverseElement;': '\u220c',
+ 'NotRightTriangle;': '\u22eb',
+ 'NotRightTriangleBar;': '\u29d0\u0338',
+ 'NotRightTriangleEqual;': '\u22ed',
+ 'NotSquareSubset;': '\u228f\u0338',
+ 'NotSquareSubsetEqual;': '\u22e2',
+ 'NotSquareSuperset;': '\u2290\u0338',
+ 'NotSquareSupersetEqual;': '\u22e3',
+ 'NotSubset;': '\u2282\u20d2',
+ 'NotSubsetEqual;': '\u2288',
+ 'NotSucceeds;': '\u2281',
+ 'NotSucceedsEqual;': '\u2ab0\u0338',
+ 'NotSucceedsSlantEqual;': '\u22e1',
+ 'NotSucceedsTilde;': '\u227f\u0338',
+ 'NotSuperset;': '\u2283\u20d2',
+ 'NotSupersetEqual;': '\u2289',
+ 'NotTilde;': '\u2241',
+ 'NotTildeEqual;': '\u2244',
+ 'NotTildeFullEqual;': '\u2247',
+ 'NotTildeTilde;': '\u2249',
+ 'NotVerticalBar;': '\u2224',
+ 'npar;': '\u2226',
+ 'nparallel;': '\u2226',
+ 'nparsl;': '\u2afd\u20e5',
+ 'npart;': '\u2202\u0338',
+ 'npolint;': '\u2a14',
+ 'npr;': '\u2280',
+ 'nprcue;': '\u22e0',
+ 'npre;': '\u2aaf\u0338',
+ 'nprec;': '\u2280',
+ 'npreceq;': '\u2aaf\u0338',
+ 'nrArr;': '\u21cf',
+ 'nrarr;': '\u219b',
+ 'nrarrc;': '\u2933\u0338',
+ 'nrarrw;': '\u219d\u0338',
+ 'nRightarrow;': '\u21cf',
+ 'nrightarrow;': '\u219b',
+ 'nrtri;': '\u22eb',
+ 'nrtrie;': '\u22ed',
+ 'nsc;': '\u2281',
+ 'nsccue;': '\u22e1',
+ 'nsce;': '\u2ab0\u0338',
+ 'Nscr;': '\U0001d4a9',
+ 'nscr;': '\U0001d4c3',
+ 'nshortmid;': '\u2224',
+ 'nshortparallel;': '\u2226',
+ 'nsim;': '\u2241',
+ 'nsime;': '\u2244',
+ 'nsimeq;': '\u2244',
+ 'nsmid;': '\u2224',
+ 'nspar;': '\u2226',
+ 'nsqsube;': '\u22e2',
+ 'nsqsupe;': '\u22e3',
+ 'nsub;': '\u2284',
+ 'nsubE;': '\u2ac5\u0338',
+ 'nsube;': '\u2288',
+ 'nsubset;': '\u2282\u20d2',
+ 'nsubseteq;': '\u2288',
+ 'nsubseteqq;': '\u2ac5\u0338',
+ 'nsucc;': '\u2281',
+ 'nsucceq;': '\u2ab0\u0338',
+ 'nsup;': '\u2285',
+ 'nsupE;': '\u2ac6\u0338',
+ 'nsupe;': '\u2289',
+ 'nsupset;': '\u2283\u20d2',
+ 'nsupseteq;': '\u2289',
+ 'nsupseteqq;': '\u2ac6\u0338',
+ 'ntgl;': '\u2279',
+ 'Ntilde': '\xd1',
+ 'ntilde': '\xf1',
+ 'Ntilde;': '\xd1',
+ 'ntilde;': '\xf1',
+ 'ntlg;': '\u2278',
+ 'ntriangleleft;': '\u22ea',
+ 'ntrianglelefteq;': '\u22ec',
+ 'ntriangleright;': '\u22eb',
+ 'ntrianglerighteq;': '\u22ed',
+ 'Nu;': '\u039d',
+ 'nu;': '\u03bd',
+ 'num;': '#',
+ 'numero;': '\u2116',
+ 'numsp;': '\u2007',
+ 'nvap;': '\u224d\u20d2',
+ 'nVDash;': '\u22af',
+ 'nVdash;': '\u22ae',
+ 'nvDash;': '\u22ad',
+ 'nvdash;': '\u22ac',
+ 'nvge;': '\u2265\u20d2',
+ 'nvgt;': '>\u20d2',
+ 'nvHarr;': '\u2904',
+ 'nvinfin;': '\u29de',
+ 'nvlArr;': '\u2902',
+ 'nvle;': '\u2264\u20d2',
+ 'nvlt;': '<\u20d2',
+ 'nvltrie;': '\u22b4\u20d2',
+ 'nvrArr;': '\u2903',
+ 'nvrtrie;': '\u22b5\u20d2',
+ 'nvsim;': '\u223c\u20d2',
+ 'nwarhk;': '\u2923',
+ 'nwArr;': '\u21d6',
+ 'nwarr;': '\u2196',
+ 'nwarrow;': '\u2196',
+ 'nwnear;': '\u2927',
+ 'Oacute': '\xd3',
+ 'oacute': '\xf3',
+ 'Oacute;': '\xd3',
+ 'oacute;': '\xf3',
+ 'oast;': '\u229b',
+ 'ocir;': '\u229a',
+ 'Ocirc': '\xd4',
+ 'ocirc': '\xf4',
+ 'Ocirc;': '\xd4',
+ 'ocirc;': '\xf4',
+ 'Ocy;': '\u041e',
+ 'ocy;': '\u043e',
+ 'odash;': '\u229d',
+ 'Odblac;': '\u0150',
+ 'odblac;': '\u0151',
+ 'odiv;': '\u2a38',
+ 'odot;': '\u2299',
+ 'odsold;': '\u29bc',
+ 'OElig;': '\u0152',
+ 'oelig;': '\u0153',
+ 'ofcir;': '\u29bf',
+ 'Ofr;': '\U0001d512',
+ 'ofr;': '\U0001d52c',
+ 'ogon;': '\u02db',
+ 'Ograve': '\xd2',
+ 'ograve': '\xf2',
+ 'Ograve;': '\xd2',
+ 'ograve;': '\xf2',
+ 'ogt;': '\u29c1',
+ 'ohbar;': '\u29b5',
+ 'ohm;': '\u03a9',
+ 'oint;': '\u222e',
+ 'olarr;': '\u21ba',
+ 'olcir;': '\u29be',
+ 'olcross;': '\u29bb',
+ 'oline;': '\u203e',
+ 'olt;': '\u29c0',
+ 'Omacr;': '\u014c',
+ 'omacr;': '\u014d',
+ 'Omega;': '\u03a9',
+ 'omega;': '\u03c9',
+ 'Omicron;': '\u039f',
+ 'omicron;': '\u03bf',
+ 'omid;': '\u29b6',
+ 'ominus;': '\u2296',
+ 'Oopf;': '\U0001d546',
+ 'oopf;': '\U0001d560',
+ 'opar;': '\u29b7',
+ 'OpenCurlyDoubleQuote;': '\u201c',
+ 'OpenCurlyQuote;': '\u2018',
+ 'operp;': '\u29b9',
+ 'oplus;': '\u2295',
+ 'Or;': '\u2a54',
+ 'or;': '\u2228',
+ 'orarr;': '\u21bb',
+ 'ord;': '\u2a5d',
+ 'order;': '\u2134',
+ 'orderof;': '\u2134',
+ 'ordf': '\xaa',
+ 'ordf;': '\xaa',
+ 'ordm': '\xba',
+ 'ordm;': '\xba',
+ 'origof;': '\u22b6',
+ 'oror;': '\u2a56',
+ 'orslope;': '\u2a57',
+ 'orv;': '\u2a5b',
+ 'oS;': '\u24c8',
+ 'Oscr;': '\U0001d4aa',
+ 'oscr;': '\u2134',
+ 'Oslash': '\xd8',
+ 'oslash': '\xf8',
+ 'Oslash;': '\xd8',
+ 'oslash;': '\xf8',
+ 'osol;': '\u2298',
+ 'Otilde': '\xd5',
+ 'otilde': '\xf5',
+ 'Otilde;': '\xd5',
+ 'otilde;': '\xf5',
+ 'Otimes;': '\u2a37',
+ 'otimes;': '\u2297',
+ 'otimesas;': '\u2a36',
+ 'Ouml': '\xd6',
+ 'ouml': '\xf6',
+ 'Ouml;': '\xd6',
+ 'ouml;': '\xf6',
+ 'ovbar;': '\u233d',
+ 'OverBar;': '\u203e',
+ 'OverBrace;': '\u23de',
+ 'OverBracket;': '\u23b4',
+ 'OverParenthesis;': '\u23dc',
+ 'par;': '\u2225',
+ 'para': '\xb6',
+ 'para;': '\xb6',
+ 'parallel;': '\u2225',
+ 'parsim;': '\u2af3',
+ 'parsl;': '\u2afd',
+ 'part;': '\u2202',
+ 'PartialD;': '\u2202',
+ 'Pcy;': '\u041f',
+ 'pcy;': '\u043f',
+ 'percnt;': '%',
+ 'period;': '.',
+ 'permil;': '\u2030',
+ 'perp;': '\u22a5',
+ 'pertenk;': '\u2031',
+ 'Pfr;': '\U0001d513',
+ 'pfr;': '\U0001d52d',
+ 'Phi;': '\u03a6',
+ 'phi;': '\u03c6',
+ 'phiv;': '\u03d5',
+ 'phmmat;': '\u2133',
+ 'phone;': '\u260e',
+ 'Pi;': '\u03a0',
+ 'pi;': '\u03c0',
+ 'pitchfork;': '\u22d4',
+ 'piv;': '\u03d6',
+ 'planck;': '\u210f',
+ 'planckh;': '\u210e',
+ 'plankv;': '\u210f',
+ 'plus;': '+',
+ 'plusacir;': '\u2a23',
+ 'plusb;': '\u229e',
+ 'pluscir;': '\u2a22',
+ 'plusdo;': '\u2214',
+ 'plusdu;': '\u2a25',
+ 'pluse;': '\u2a72',
+ 'PlusMinus;': '\xb1',
+ 'plusmn': '\xb1',
+ 'plusmn;': '\xb1',
+ 'plussim;': '\u2a26',
+ 'plustwo;': '\u2a27',
+ 'pm;': '\xb1',
+ 'Poincareplane;': '\u210c',
+ 'pointint;': '\u2a15',
+ 'Popf;': '\u2119',
+ 'popf;': '\U0001d561',
+ 'pound': '\xa3',
+ 'pound;': '\xa3',
+ 'Pr;': '\u2abb',
+ 'pr;': '\u227a',
+ 'prap;': '\u2ab7',
+ 'prcue;': '\u227c',
+ 'prE;': '\u2ab3',
+ 'pre;': '\u2aaf',
+ 'prec;': '\u227a',
+ 'precapprox;': '\u2ab7',
+ 'preccurlyeq;': '\u227c',
+ 'Precedes;': '\u227a',
+ 'PrecedesEqual;': '\u2aaf',
+ 'PrecedesSlantEqual;': '\u227c',
+ 'PrecedesTilde;': '\u227e',
+ 'preceq;': '\u2aaf',
+ 'precnapprox;': '\u2ab9',
+ 'precneqq;': '\u2ab5',
+ 'precnsim;': '\u22e8',
+ 'precsim;': '\u227e',
+ 'Prime;': '\u2033',
+ 'prime;': '\u2032',
+ 'primes;': '\u2119',
+ 'prnap;': '\u2ab9',
+ 'prnE;': '\u2ab5',
+ 'prnsim;': '\u22e8',
+ 'prod;': '\u220f',
+ 'Product;': '\u220f',
+ 'profalar;': '\u232e',
+ 'profline;': '\u2312',
+ 'profsurf;': '\u2313',
+ 'prop;': '\u221d',
+ 'Proportion;': '\u2237',
+ 'Proportional;': '\u221d',
+ 'propto;': '\u221d',
+ 'prsim;': '\u227e',
+ 'prurel;': '\u22b0',
+ 'Pscr;': '\U0001d4ab',
+ 'pscr;': '\U0001d4c5',
+ 'Psi;': '\u03a8',
+ 'psi;': '\u03c8',
+ 'puncsp;': '\u2008',
+ 'Qfr;': '\U0001d514',
+ 'qfr;': '\U0001d52e',
+ 'qint;': '\u2a0c',
+ 'Qopf;': '\u211a',
+ 'qopf;': '\U0001d562',
+ 'qprime;': '\u2057',
+ 'Qscr;': '\U0001d4ac',
+ 'qscr;': '\U0001d4c6',
+ 'quaternions;': '\u210d',
+ 'quatint;': '\u2a16',
+ 'quest;': '?',
+ 'questeq;': '\u225f',
+ 'QUOT': '"',
+ 'quot': '"',
+ 'QUOT;': '"',
+ 'quot;': '"',
+ 'rAarr;': '\u21db',
+ 'race;': '\u223d\u0331',
+ 'Racute;': '\u0154',
+ 'racute;': '\u0155',
+ 'radic;': '\u221a',
+ 'raemptyv;': '\u29b3',
+ 'Rang;': '\u27eb',
+ 'rang;': '\u27e9',
+ 'rangd;': '\u2992',
+ 'range;': '\u29a5',
+ 'rangle;': '\u27e9',
+ 'raquo': '\xbb',
+ 'raquo;': '\xbb',
+ 'Rarr;': '\u21a0',
+ 'rArr;': '\u21d2',
+ 'rarr;': '\u2192',
+ 'rarrap;': '\u2975',
+ 'rarrb;': '\u21e5',
+ 'rarrbfs;': '\u2920',
+ 'rarrc;': '\u2933',
+ 'rarrfs;': '\u291e',
+ 'rarrhk;': '\u21aa',
+ 'rarrlp;': '\u21ac',
+ 'rarrpl;': '\u2945',
+ 'rarrsim;': '\u2974',
+ 'Rarrtl;': '\u2916',
+ 'rarrtl;': '\u21a3',
+ 'rarrw;': '\u219d',
+ 'rAtail;': '\u291c',
+ 'ratail;': '\u291a',
+ 'ratio;': '\u2236',
+ 'rationals;': '\u211a',
+ 'RBarr;': '\u2910',
+ 'rBarr;': '\u290f',
+ 'rbarr;': '\u290d',
+ 'rbbrk;': '\u2773',
+ 'rbrace;': '}',
+ 'rbrack;': ']',
+ 'rbrke;': '\u298c',
+ 'rbrksld;': '\u298e',
+ 'rbrkslu;': '\u2990',
+ 'Rcaron;': '\u0158',
+ 'rcaron;': '\u0159',
+ 'Rcedil;': '\u0156',
+ 'rcedil;': '\u0157',
+ 'rceil;': '\u2309',
+ 'rcub;': '}',
+ 'Rcy;': '\u0420',
+ 'rcy;': '\u0440',
+ 'rdca;': '\u2937',
+ 'rdldhar;': '\u2969',
+ 'rdquo;': '\u201d',
+ 'rdquor;': '\u201d',
+ 'rdsh;': '\u21b3',
+ 'Re;': '\u211c',
+ 'real;': '\u211c',
+ 'realine;': '\u211b',
+ 'realpart;': '\u211c',
+ 'reals;': '\u211d',
+ 'rect;': '\u25ad',
+ 'REG': '\xae',
+ 'reg': '\xae',
+ 'REG;': '\xae',
+ 'reg;': '\xae',
+ 'ReverseElement;': '\u220b',
+ 'ReverseEquilibrium;': '\u21cb',
+ 'ReverseUpEquilibrium;': '\u296f',
+ 'rfisht;': '\u297d',
+ 'rfloor;': '\u230b',
+ 'Rfr;': '\u211c',
+ 'rfr;': '\U0001d52f',
+ 'rHar;': '\u2964',
+ 'rhard;': '\u21c1',
+ 'rharu;': '\u21c0',
+ 'rharul;': '\u296c',
+ 'Rho;': '\u03a1',
+ 'rho;': '\u03c1',
+ 'rhov;': '\u03f1',
+ 'RightAngleBracket;': '\u27e9',
+ 'RightArrow;': '\u2192',
+ 'Rightarrow;': '\u21d2',
+ 'rightarrow;': '\u2192',
+ 'RightArrowBar;': '\u21e5',
+ 'RightArrowLeftArrow;': '\u21c4',
+ 'rightarrowtail;': '\u21a3',
+ 'RightCeiling;': '\u2309',
+ 'RightDoubleBracket;': '\u27e7',
+ 'RightDownTeeVector;': '\u295d',
+ 'RightDownVector;': '\u21c2',
+ 'RightDownVectorBar;': '\u2955',
+ 'RightFloor;': '\u230b',
+ 'rightharpoondown;': '\u21c1',
+ 'rightharpoonup;': '\u21c0',
+ 'rightleftarrows;': '\u21c4',
+ 'rightleftharpoons;': '\u21cc',
+ 'rightrightarrows;': '\u21c9',
+ 'rightsquigarrow;': '\u219d',
+ 'RightTee;': '\u22a2',
+ 'RightTeeArrow;': '\u21a6',
+ 'RightTeeVector;': '\u295b',
+ 'rightthreetimes;': '\u22cc',
+ 'RightTriangle;': '\u22b3',
+ 'RightTriangleBar;': '\u29d0',
+ 'RightTriangleEqual;': '\u22b5',
+ 'RightUpDownVector;': '\u294f',
+ 'RightUpTeeVector;': '\u295c',
+ 'RightUpVector;': '\u21be',
+ 'RightUpVectorBar;': '\u2954',
+ 'RightVector;': '\u21c0',
+ 'RightVectorBar;': '\u2953',
+ 'ring;': '\u02da',
+ 'risingdotseq;': '\u2253',
+ 'rlarr;': '\u21c4',
+ 'rlhar;': '\u21cc',
+ 'rlm;': '\u200f',
+ 'rmoust;': '\u23b1',
+ 'rmoustache;': '\u23b1',
+ 'rnmid;': '\u2aee',
+ 'roang;': '\u27ed',
+ 'roarr;': '\u21fe',
+ 'robrk;': '\u27e7',
+ 'ropar;': '\u2986',
+ 'Ropf;': '\u211d',
+ 'ropf;': '\U0001d563',
+ 'roplus;': '\u2a2e',
+ 'rotimes;': '\u2a35',
+ 'RoundImplies;': '\u2970',
+ 'rpar;': ')',
+ 'rpargt;': '\u2994',
+ 'rppolint;': '\u2a12',
+ 'rrarr;': '\u21c9',
+ 'Rrightarrow;': '\u21db',
+ 'rsaquo;': '\u203a',
+ 'Rscr;': '\u211b',
+ 'rscr;': '\U0001d4c7',
+ 'Rsh;': '\u21b1',
+ 'rsh;': '\u21b1',
+ 'rsqb;': ']',
+ 'rsquo;': '\u2019',
+ 'rsquor;': '\u2019',
+ 'rthree;': '\u22cc',
+ 'rtimes;': '\u22ca',
+ 'rtri;': '\u25b9',
+ 'rtrie;': '\u22b5',
+ 'rtrif;': '\u25b8',
+ 'rtriltri;': '\u29ce',
+ 'RuleDelayed;': '\u29f4',
+ 'ruluhar;': '\u2968',
+ 'rx;': '\u211e',
+ 'Sacute;': '\u015a',
+ 'sacute;': '\u015b',
+ 'sbquo;': '\u201a',
+ 'Sc;': '\u2abc',
+ 'sc;': '\u227b',
+ 'scap;': '\u2ab8',
+ 'Scaron;': '\u0160',
+ 'scaron;': '\u0161',
+ 'sccue;': '\u227d',
+ 'scE;': '\u2ab4',
+ 'sce;': '\u2ab0',
+ 'Scedil;': '\u015e',
+ 'scedil;': '\u015f',
+ 'Scirc;': '\u015c',
+ 'scirc;': '\u015d',
+ 'scnap;': '\u2aba',
+ 'scnE;': '\u2ab6',
+ 'scnsim;': '\u22e9',
+ 'scpolint;': '\u2a13',
+ 'scsim;': '\u227f',
+ 'Scy;': '\u0421',
+ 'scy;': '\u0441',
+ 'sdot;': '\u22c5',
+ 'sdotb;': '\u22a1',
+ 'sdote;': '\u2a66',
+ 'searhk;': '\u2925',
+ 'seArr;': '\u21d8',
+ 'searr;': '\u2198',
+ 'searrow;': '\u2198',
+ 'sect': '\xa7',
+ 'sect;': '\xa7',
+ 'semi;': ';',
+ 'seswar;': '\u2929',
+ 'setminus;': '\u2216',
+ 'setmn;': '\u2216',
+ 'sext;': '\u2736',
+ 'Sfr;': '\U0001d516',
+ 'sfr;': '\U0001d530',
+ 'sfrown;': '\u2322',
+ 'sharp;': '\u266f',
+ 'SHCHcy;': '\u0429',
+ 'shchcy;': '\u0449',
+ 'SHcy;': '\u0428',
+ 'shcy;': '\u0448',
+ 'ShortDownArrow;': '\u2193',
+ 'ShortLeftArrow;': '\u2190',
+ 'shortmid;': '\u2223',
+ 'shortparallel;': '\u2225',
+ 'ShortRightArrow;': '\u2192',
+ 'ShortUpArrow;': '\u2191',
+ 'shy': '\xad',
+ 'shy;': '\xad',
+ 'Sigma;': '\u03a3',
+ 'sigma;': '\u03c3',
+ 'sigmaf;': '\u03c2',
+ 'sigmav;': '\u03c2',
+ 'sim;': '\u223c',
+ 'simdot;': '\u2a6a',
+ 'sime;': '\u2243',
+ 'simeq;': '\u2243',
+ 'simg;': '\u2a9e',
+ 'simgE;': '\u2aa0',
+ 'siml;': '\u2a9d',
+ 'simlE;': '\u2a9f',
+ 'simne;': '\u2246',
+ 'simplus;': '\u2a24',
+ 'simrarr;': '\u2972',
+ 'slarr;': '\u2190',
+ 'SmallCircle;': '\u2218',
+ 'smallsetminus;': '\u2216',
+ 'smashp;': '\u2a33',
+ 'smeparsl;': '\u29e4',
+ 'smid;': '\u2223',
+ 'smile;': '\u2323',
+ 'smt;': '\u2aaa',
+ 'smte;': '\u2aac',
+ 'smtes;': '\u2aac\ufe00',
+ 'SOFTcy;': '\u042c',
+ 'softcy;': '\u044c',
+ 'sol;': '/',
+ 'solb;': '\u29c4',
+ 'solbar;': '\u233f',
+ 'Sopf;': '\U0001d54a',
+ 'sopf;': '\U0001d564',
+ 'spades;': '\u2660',
+ 'spadesuit;': '\u2660',
+ 'spar;': '\u2225',
+ 'sqcap;': '\u2293',
+ 'sqcaps;': '\u2293\ufe00',
+ 'sqcup;': '\u2294',
+ 'sqcups;': '\u2294\ufe00',
+ 'Sqrt;': '\u221a',
+ 'sqsub;': '\u228f',
+ 'sqsube;': '\u2291',
+ 'sqsubset;': '\u228f',
+ 'sqsubseteq;': '\u2291',
+ 'sqsup;': '\u2290',
+ 'sqsupe;': '\u2292',
+ 'sqsupset;': '\u2290',
+ 'sqsupseteq;': '\u2292',
+ 'squ;': '\u25a1',
+ 'Square;': '\u25a1',
+ 'square;': '\u25a1',
+ 'SquareIntersection;': '\u2293',
+ 'SquareSubset;': '\u228f',
+ 'SquareSubsetEqual;': '\u2291',
+ 'SquareSuperset;': '\u2290',
+ 'SquareSupersetEqual;': '\u2292',
+ 'SquareUnion;': '\u2294',
+ 'squarf;': '\u25aa',
+ 'squf;': '\u25aa',
+ 'srarr;': '\u2192',
+ 'Sscr;': '\U0001d4ae',
+ 'sscr;': '\U0001d4c8',
+ 'ssetmn;': '\u2216',
+ 'ssmile;': '\u2323',
+ 'sstarf;': '\u22c6',
+ 'Star;': '\u22c6',
+ 'star;': '\u2606',
+ 'starf;': '\u2605',
+ 'straightepsilon;': '\u03f5',
+ 'straightphi;': '\u03d5',
+ 'strns;': '\xaf',
+ 'Sub;': '\u22d0',
+ 'sub;': '\u2282',
+ 'subdot;': '\u2abd',
+ 'subE;': '\u2ac5',
+ 'sube;': '\u2286',
+ 'subedot;': '\u2ac3',
+ 'submult;': '\u2ac1',
+ 'subnE;': '\u2acb',
+ 'subne;': '\u228a',
+ 'subplus;': '\u2abf',
+ 'subrarr;': '\u2979',
+ 'Subset;': '\u22d0',
+ 'subset;': '\u2282',
+ 'subseteq;': '\u2286',
+ 'subseteqq;': '\u2ac5',
+ 'SubsetEqual;': '\u2286',
+ 'subsetneq;': '\u228a',
+ 'subsetneqq;': '\u2acb',
+ 'subsim;': '\u2ac7',
+ 'subsub;': '\u2ad5',
+ 'subsup;': '\u2ad3',
+ 'succ;': '\u227b',
+ 'succapprox;': '\u2ab8',
+ 'succcurlyeq;': '\u227d',
+ 'Succeeds;': '\u227b',
+ 'SucceedsEqual;': '\u2ab0',
+ 'SucceedsSlantEqual;': '\u227d',
+ 'SucceedsTilde;': '\u227f',
+ 'succeq;': '\u2ab0',
+ 'succnapprox;': '\u2aba',
+ 'succneqq;': '\u2ab6',
+ 'succnsim;': '\u22e9',
+ 'succsim;': '\u227f',
+ 'SuchThat;': '\u220b',
+ 'Sum;': '\u2211',
+ 'sum;': '\u2211',
+ 'sung;': '\u266a',
+ 'sup1': '\xb9',
+ 'sup1;': '\xb9',
+ 'sup2': '\xb2',
+ 'sup2;': '\xb2',
+ 'sup3': '\xb3',
+ 'sup3;': '\xb3',
+ 'Sup;': '\u22d1',
+ 'sup;': '\u2283',
+ 'supdot;': '\u2abe',
+ 'supdsub;': '\u2ad8',
+ 'supE;': '\u2ac6',
+ 'supe;': '\u2287',
+ 'supedot;': '\u2ac4',
+ 'Superset;': '\u2283',
+ 'SupersetEqual;': '\u2287',
+ 'suphsol;': '\u27c9',
+ 'suphsub;': '\u2ad7',
+ 'suplarr;': '\u297b',
+ 'supmult;': '\u2ac2',
+ 'supnE;': '\u2acc',
+ 'supne;': '\u228b',
+ 'supplus;': '\u2ac0',
+ 'Supset;': '\u22d1',
+ 'supset;': '\u2283',
+ 'supseteq;': '\u2287',
+ 'supseteqq;': '\u2ac6',
+ 'supsetneq;': '\u228b',
+ 'supsetneqq;': '\u2acc',
+ 'supsim;': '\u2ac8',
+ 'supsub;': '\u2ad4',
+ 'supsup;': '\u2ad6',
+ 'swarhk;': '\u2926',
+ 'swArr;': '\u21d9',
+ 'swarr;': '\u2199',
+ 'swarrow;': '\u2199',
+ 'swnwar;': '\u292a',
+ 'szlig': '\xdf',
+ 'szlig;': '\xdf',
+ 'Tab;': '\t',
+ 'target;': '\u2316',
+ 'Tau;': '\u03a4',
+ 'tau;': '\u03c4',
+ 'tbrk;': '\u23b4',
+ 'Tcaron;': '\u0164',
+ 'tcaron;': '\u0165',
+ 'Tcedil;': '\u0162',
+ 'tcedil;': '\u0163',
+ 'Tcy;': '\u0422',
+ 'tcy;': '\u0442',
+ 'tdot;': '\u20db',
+ 'telrec;': '\u2315',
+ 'Tfr;': '\U0001d517',
+ 'tfr;': '\U0001d531',
+ 'there4;': '\u2234',
+ 'Therefore;': '\u2234',
+ 'therefore;': '\u2234',
+ 'Theta;': '\u0398',
+ 'theta;': '\u03b8',
+ 'thetasym;': '\u03d1',
+ 'thetav;': '\u03d1',
+ 'thickapprox;': '\u2248',
+ 'thicksim;': '\u223c',
+ 'ThickSpace;': '\u205f\u200a',
+ 'thinsp;': '\u2009',
+ 'ThinSpace;': '\u2009',
+ 'thkap;': '\u2248',
+ 'thksim;': '\u223c',
+ 'THORN': '\xde',
+ 'thorn': '\xfe',
+ 'THORN;': '\xde',
+ 'thorn;': '\xfe',
+ 'Tilde;': '\u223c',
+ 'tilde;': '\u02dc',
+ 'TildeEqual;': '\u2243',
+ 'TildeFullEqual;': '\u2245',
+ 'TildeTilde;': '\u2248',
+ 'times': '\xd7',
+ 'times;': '\xd7',
+ 'timesb;': '\u22a0',
+ 'timesbar;': '\u2a31',
+ 'timesd;': '\u2a30',
+ 'tint;': '\u222d',
+ 'toea;': '\u2928',
+ 'top;': '\u22a4',
+ 'topbot;': '\u2336',
+ 'topcir;': '\u2af1',
+ 'Topf;': '\U0001d54b',
+ 'topf;': '\U0001d565',
+ 'topfork;': '\u2ada',
+ 'tosa;': '\u2929',
+ 'tprime;': '\u2034',
+ 'TRADE;': '\u2122',
+ 'trade;': '\u2122',
+ 'triangle;': '\u25b5',
+ 'triangledown;': '\u25bf',
+ 'triangleleft;': '\u25c3',
+ 'trianglelefteq;': '\u22b4',
+ 'triangleq;': '\u225c',
+ 'triangleright;': '\u25b9',
+ 'trianglerighteq;': '\u22b5',
+ 'tridot;': '\u25ec',
+ 'trie;': '\u225c',
+ 'triminus;': '\u2a3a',
+ 'TripleDot;': '\u20db',
+ 'triplus;': '\u2a39',
+ 'trisb;': '\u29cd',
+ 'tritime;': '\u2a3b',
+ 'trpezium;': '\u23e2',
+ 'Tscr;': '\U0001d4af',
+ 'tscr;': '\U0001d4c9',
+ 'TScy;': '\u0426',
+ 'tscy;': '\u0446',
+ 'TSHcy;': '\u040b',
+ 'tshcy;': '\u045b',
+ 'Tstrok;': '\u0166',
+ 'tstrok;': '\u0167',
+ 'twixt;': '\u226c',
+ 'twoheadleftarrow;': '\u219e',
+ 'twoheadrightarrow;': '\u21a0',
+ 'Uacute': '\xda',
+ 'uacute': '\xfa',
+ 'Uacute;': '\xda',
+ 'uacute;': '\xfa',
+ 'Uarr;': '\u219f',
+ 'uArr;': '\u21d1',
+ 'uarr;': '\u2191',
+ 'Uarrocir;': '\u2949',
+ 'Ubrcy;': '\u040e',
+ 'ubrcy;': '\u045e',
+ 'Ubreve;': '\u016c',
+ 'ubreve;': '\u016d',
+ 'Ucirc': '\xdb',
+ 'ucirc': '\xfb',
+ 'Ucirc;': '\xdb',
+ 'ucirc;': '\xfb',
+ 'Ucy;': '\u0423',
+ 'ucy;': '\u0443',
+ 'udarr;': '\u21c5',
+ 'Udblac;': '\u0170',
+ 'udblac;': '\u0171',
+ 'udhar;': '\u296e',
+ 'ufisht;': '\u297e',
+ 'Ufr;': '\U0001d518',
+ 'ufr;': '\U0001d532',
+ 'Ugrave': '\xd9',
+ 'ugrave': '\xf9',
+ 'Ugrave;': '\xd9',
+ 'ugrave;': '\xf9',
+ 'uHar;': '\u2963',
+ 'uharl;': '\u21bf',
+ 'uharr;': '\u21be',
+ 'uhblk;': '\u2580',
+ 'ulcorn;': '\u231c',
+ 'ulcorner;': '\u231c',
+ 'ulcrop;': '\u230f',
+ 'ultri;': '\u25f8',
+ 'Umacr;': '\u016a',
+ 'umacr;': '\u016b',
+ 'uml': '\xa8',
+ 'uml;': '\xa8',
+ 'UnderBar;': '_',
+ 'UnderBrace;': '\u23df',
+ 'UnderBracket;': '\u23b5',
+ 'UnderParenthesis;': '\u23dd',
+ 'Union;': '\u22c3',
+ 'UnionPlus;': '\u228e',
+ 'Uogon;': '\u0172',
+ 'uogon;': '\u0173',
+ 'Uopf;': '\U0001d54c',
+ 'uopf;': '\U0001d566',
+ 'UpArrow;': '\u2191',
+ 'Uparrow;': '\u21d1',
+ 'uparrow;': '\u2191',
+ 'UpArrowBar;': '\u2912',
+ 'UpArrowDownArrow;': '\u21c5',
+ 'UpDownArrow;': '\u2195',
+ 'Updownarrow;': '\u21d5',
+ 'updownarrow;': '\u2195',
+ 'UpEquilibrium;': '\u296e',
+ 'upharpoonleft;': '\u21bf',
+ 'upharpoonright;': '\u21be',
+ 'uplus;': '\u228e',
+ 'UpperLeftArrow;': '\u2196',
+ 'UpperRightArrow;': '\u2197',
+ 'Upsi;': '\u03d2',
+ 'upsi;': '\u03c5',
+ 'upsih;': '\u03d2',
+ 'Upsilon;': '\u03a5',
+ 'upsilon;': '\u03c5',
+ 'UpTee;': '\u22a5',
+ 'UpTeeArrow;': '\u21a5',
+ 'upuparrows;': '\u21c8',
+ 'urcorn;': '\u231d',
+ 'urcorner;': '\u231d',
+ 'urcrop;': '\u230e',
+ 'Uring;': '\u016e',
+ 'uring;': '\u016f',
+ 'urtri;': '\u25f9',
+ 'Uscr;': '\U0001d4b0',
+ 'uscr;': '\U0001d4ca',
+ 'utdot;': '\u22f0',
+ 'Utilde;': '\u0168',
+ 'utilde;': '\u0169',
+ 'utri;': '\u25b5',
+ 'utrif;': '\u25b4',
+ 'uuarr;': '\u21c8',
+ 'Uuml': '\xdc',
+ 'uuml': '\xfc',
+ 'Uuml;': '\xdc',
+ 'uuml;': '\xfc',
+ 'uwangle;': '\u29a7',
+ 'vangrt;': '\u299c',
+ 'varepsilon;': '\u03f5',
+ 'varkappa;': '\u03f0',
+ 'varnothing;': '\u2205',
+ 'varphi;': '\u03d5',
+ 'varpi;': '\u03d6',
+ 'varpropto;': '\u221d',
+ 'vArr;': '\u21d5',
+ 'varr;': '\u2195',
+ 'varrho;': '\u03f1',
+ 'varsigma;': '\u03c2',
+ 'varsubsetneq;': '\u228a\ufe00',
+ 'varsubsetneqq;': '\u2acb\ufe00',
+ 'varsupsetneq;': '\u228b\ufe00',
+ 'varsupsetneqq;': '\u2acc\ufe00',
+ 'vartheta;': '\u03d1',
+ 'vartriangleleft;': '\u22b2',
+ 'vartriangleright;': '\u22b3',
+ 'Vbar;': '\u2aeb',
+ 'vBar;': '\u2ae8',
+ 'vBarv;': '\u2ae9',
+ 'Vcy;': '\u0412',
+ 'vcy;': '\u0432',
+ 'VDash;': '\u22ab',
+ 'Vdash;': '\u22a9',
+ 'vDash;': '\u22a8',
+ 'vdash;': '\u22a2',
+ 'Vdashl;': '\u2ae6',
+ 'Vee;': '\u22c1',
+ 'vee;': '\u2228',
+ 'veebar;': '\u22bb',
+ 'veeeq;': '\u225a',
+ 'vellip;': '\u22ee',
+ 'Verbar;': '\u2016',
+ 'verbar;': '|',
+ 'Vert;': '\u2016',
+ 'vert;': '|',
+ 'VerticalBar;': '\u2223',
+ 'VerticalLine;': '|',
+ 'VerticalSeparator;': '\u2758',
+ 'VerticalTilde;': '\u2240',
+ 'VeryThinSpace;': '\u200a',
+ 'Vfr;': '\U0001d519',
+ 'vfr;': '\U0001d533',
+ 'vltri;': '\u22b2',
+ 'vnsub;': '\u2282\u20d2',
+ 'vnsup;': '\u2283\u20d2',
+ 'Vopf;': '\U0001d54d',
+ 'vopf;': '\U0001d567',
+ 'vprop;': '\u221d',
+ 'vrtri;': '\u22b3',
+ 'Vscr;': '\U0001d4b1',
+ 'vscr;': '\U0001d4cb',
+ 'vsubnE;': '\u2acb\ufe00',
+ 'vsubne;': '\u228a\ufe00',
+ 'vsupnE;': '\u2acc\ufe00',
+ 'vsupne;': '\u228b\ufe00',
+ 'Vvdash;': '\u22aa',
+ 'vzigzag;': '\u299a',
+ 'Wcirc;': '\u0174',
+ 'wcirc;': '\u0175',
+ 'wedbar;': '\u2a5f',
+ 'Wedge;': '\u22c0',
+ 'wedge;': '\u2227',
+ 'wedgeq;': '\u2259',
+ 'weierp;': '\u2118',
+ 'Wfr;': '\U0001d51a',
+ 'wfr;': '\U0001d534',
+ 'Wopf;': '\U0001d54e',
+ 'wopf;': '\U0001d568',
+ 'wp;': '\u2118',
+ 'wr;': '\u2240',
+ 'wreath;': '\u2240',
+ 'Wscr;': '\U0001d4b2',
+ 'wscr;': '\U0001d4cc',
+ 'xcap;': '\u22c2',
+ 'xcirc;': '\u25ef',
+ 'xcup;': '\u22c3',
+ 'xdtri;': '\u25bd',
+ 'Xfr;': '\U0001d51b',
+ 'xfr;': '\U0001d535',
+ 'xhArr;': '\u27fa',
+ 'xharr;': '\u27f7',
+ 'Xi;': '\u039e',
+ 'xi;': '\u03be',
+ 'xlArr;': '\u27f8',
+ 'xlarr;': '\u27f5',
+ 'xmap;': '\u27fc',
+ 'xnis;': '\u22fb',
+ 'xodot;': '\u2a00',
+ 'Xopf;': '\U0001d54f',
+ 'xopf;': '\U0001d569',
+ 'xoplus;': '\u2a01',
+ 'xotime;': '\u2a02',
+ 'xrArr;': '\u27f9',
+ 'xrarr;': '\u27f6',
+ 'Xscr;': '\U0001d4b3',
+ 'xscr;': '\U0001d4cd',
+ 'xsqcup;': '\u2a06',
+ 'xuplus;': '\u2a04',
+ 'xutri;': '\u25b3',
+ 'xvee;': '\u22c1',
+ 'xwedge;': '\u22c0',
+ 'Yacute': '\xdd',
+ 'yacute': '\xfd',
+ 'Yacute;': '\xdd',
+ 'yacute;': '\xfd',
+ 'YAcy;': '\u042f',
+ 'yacy;': '\u044f',
+ 'Ycirc;': '\u0176',
+ 'ycirc;': '\u0177',
+ 'Ycy;': '\u042b',
+ 'ycy;': '\u044b',
+ 'yen': '\xa5',
+ 'yen;': '\xa5',
+ 'Yfr;': '\U0001d51c',
+ 'yfr;': '\U0001d536',
+ 'YIcy;': '\u0407',
+ 'yicy;': '\u0457',
+ 'Yopf;': '\U0001d550',
+ 'yopf;': '\U0001d56a',
+ 'Yscr;': '\U0001d4b4',
+ 'yscr;': '\U0001d4ce',
+ 'YUcy;': '\u042e',
+ 'yucy;': '\u044e',
+ 'yuml': '\xff',
+ 'Yuml;': '\u0178',
+ 'yuml;': '\xff',
+ 'Zacute;': '\u0179',
+ 'zacute;': '\u017a',
+ 'Zcaron;': '\u017d',
+ 'zcaron;': '\u017e',
+ 'Zcy;': '\u0417',
+ 'zcy;': '\u0437',
+ 'Zdot;': '\u017b',
+ 'zdot;': '\u017c',
+ 'zeetrf;': '\u2128',
+ 'ZeroWidthSpace;': '\u200b',
+ 'Zeta;': '\u0396',
+ 'zeta;': '\u03b6',
+ 'Zfr;': '\u2128',
+ 'zfr;': '\U0001d537',
+ 'ZHcy;': '\u0416',
+ 'zhcy;': '\u0436',
+ 'zigrarr;': '\u21dd',
+ 'Zopf;': '\u2124',
+ 'zopf;': '\U0001d56b',
+ 'Zscr;': '\U0001d4b5',
+ 'zscr;': '\U0001d4cf',
+ 'zwj;': '\u200d',
+ 'zwnj;': '\u200c',
+ }
+
+try:
+ import http.client as compat_http_client
+except ImportError: # Python 2
+ import httplib as compat_http_client
+
+try:
+ from urllib.error import HTTPError as compat_HTTPError
+except ImportError: # Python 2
+ from urllib2 import HTTPError as compat_HTTPError
+
+try:
+ from urllib.request import urlretrieve as compat_urlretrieve
+except ImportError: # Python 2
+ from urllib import urlretrieve as compat_urlretrieve
+
+try:
+ from html.parser import HTMLParser as compat_HTMLParser
+except ImportError: # Python 2
+ from HTMLParser import HTMLParser as compat_HTMLParser
+
+try: # Python 2
+ from HTMLParser import HTMLParseError as compat_HTMLParseError
+except ImportError: # Python <3.4
+ try:
+ from html.parser import HTMLParseError as compat_HTMLParseError
+ except ImportError: # Python >3.4
+
+ # HTMLParseError has been deprecated in Python 3.3 and removed in
+ # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+ # and uniform cross-version exception handling
+ class compat_HTMLParseError(Exception):
+ pass
+
+try:
+ from subprocess import DEVNULL
+ compat_subprocess_get_DEVNULL = lambda: DEVNULL
+except ImportError:
+ compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
+try:
+ import http.server as compat_http_server
+except ImportError:
+ import BaseHTTPServer as compat_http_server
+
+try:
+ compat_str = unicode # Python 2
+except NameError:
+ compat_str = str
+
+try:
+ from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
+ from urllib.parse import unquote as compat_urllib_parse_unquote
+ from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+except ImportError: # Python 2
+ _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
+ else re.compile(r'([\x00-\x7f]+)'))
+
+ # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
+ # implementations from cpython 3.4.3's stdlib. Python 2's version
+ # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
+
+ def compat_urllib_parse_unquote_to_bytes(string):
+ """unquote_to_bytes('abc%20def') -> b'abc def'."""
+ # Note: strings are encoded as UTF-8. This is only an issue if it contains
+ # unescaped non-ASCII characters, which URIs should not.
+ if not string:
+ # Is it a string-like object?
+ string.split
+ return b''
+ if isinstance(string, compat_str):
+ string = string.encode('utf-8')
+ bits = string.split(b'%')
+ if len(bits) == 1:
+ return string
+ res = [bits[0]]
+ append = res.append
+ for item in bits[1:]:
+ try:
+ append(compat_urllib_parse._hextochr[item[:2]])
+ append(item[2:])
+ except KeyError:
+ append(b'%')
+ append(item)
+ return b''.join(res)
+
+ def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
+ """Replace %xx escapes by their single-character equivalent. The optional
+ encoding and errors parameters specify how to decode percent-encoded
+ sequences into Unicode characters, as accepted by the bytes.decode()
+ method.
+ By default, percent-encoded sequences are decoded with UTF-8, and invalid
+ sequences are replaced by a placeholder character.
+
+ unquote('abc%20def') -> 'abc def'.
+ """
+ if '%' not in string:
+ string.split
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'replace'
+ bits = _asciire.split(string)
+ res = [bits[0]]
+ append = res.append
+ for i in range(1, len(bits), 2):
+ append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
+ append(bits[i + 1])
+ return ''.join(res)
+
+ def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
+ """Like unquote(), but also replace plus signs by spaces, as required for
+ unquoting HTML form values.
+
+ unquote_plus('%7e/abc+def') -> '~/abc def'
+ """
+ string = string.replace('+', ' ')
+ return compat_urllib_parse_unquote(string, encoding, errors)
+
+try:
+ from urllib.parse import urlencode as compat_urllib_parse_urlencode
+except ImportError: # Python 2
+ # Python 2 will choke in urlencode on mixture of byte and unicode strings.
+ # Possible solutions are to either port it from python 3 with all
+ # the friends or manually ensure input query contains only byte strings.
+ # We will stick with latter thus recursively encoding the whole query.
+ def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+ def encode_elem(e):
+ if isinstance(e, dict):
+ e = encode_dict(e)
+ elif isinstance(e, (list, tuple,)):
+ list_e = encode_list(e)
+ e = tuple(list_e) if isinstance(e, tuple) else list_e
+ elif isinstance(e, compat_str):
+ e = e.encode(encoding)
+ return e
+
+ def encode_dict(d):
+ return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+
+ def encode_list(l):
+ return [encode_elem(e) for e in l]
+
+ return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+
+try:
+ from urllib.request import DataHandler as compat_urllib_request_DataHandler
+except ImportError: # Python < 3.4
+ # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
+ class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
+ def data_open(self, req):
+ # data URLs as specified in RFC 2397.
+ #
+ # ignores POSTed data
+ #
+ # syntax:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ url = req.get_full_url()
+
+ scheme, data = url.split(':', 1)
+ mediatype, data = data.split(',', 1)
+
+ # even base64 encoded data URLs might be quoted so unquote in any case:
+ data = compat_urllib_parse_unquote_to_bytes(data)
+ if mediatype.endswith(';base64'):
+ data = binascii.a2b_base64(data)
+ mediatype = mediatype[:-7]
+
+ if not mediatype:
+ mediatype = 'text/plain;charset=US-ASCII'
+
+ headers = email.message_from_string(
+ 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
+
+ return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
+
+try:
+ compat_basestring = basestring # Python 2
+except NameError:
+ compat_basestring = str
+
+try:
+ compat_chr = unichr # Python 2
+except NameError:
+ compat_chr = chr
+
+try:
+ from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError: # Python 2.6
+ from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass
+
+
+try:
+ # xml.etree.ElementTree.Element is a method in Python <=2.6 and
+ # the following will crash with:
+ # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
+ isinstance(None, xml.etree.ElementTree.Element)
+ from xml.etree.ElementTree import Element as compat_etree_Element
+except TypeError: # Python <=2.6
+ from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+
+if sys.version_info[0] >= 3:
+ def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+else:
+ # python 2.x tries to encode unicode strings with ascii (see the
+ # XMLParser._fixtext method)
+ try:
+ _etree_iter = etree.Element.iter
+ except AttributeError: # Python <=2.6
+ def _etree_iter(root):
+ for el in root.findall('*'):
+ yield el
+ for sub in _etree_iter(el):
+ yield sub
+
+ # on 2.6 XML doesn't have a parser argument, function copied from CPython
+ # 2.7 source
+ def _XML(text, parser=None):
+ if not parser:
+ parser = etree.XMLParser(target=_TreeBuilder())
+ parser.feed(text)
+ return parser.close()
+
+ def _element_factory(*args, **kwargs):
+ el = etree.Element(*args, **kwargs)
+ for k, v in el.items():
+ if isinstance(v, bytes):
+ el.set(k, v.decode('utf-8'))
+ return el
+
+ def compat_etree_fromstring(text):
+ doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
+ for el in _etree_iter(doc):
+ if el.text is not None and isinstance(el.text, bytes):
+ el.text = el.text.decode('utf-8')
+ return doc
+
+if hasattr(etree, 'register_namespace'):
+ compat_etree_register_namespace = etree.register_namespace
+else:
+ def compat_etree_register_namespace(prefix, uri):
+ """Register a namespace prefix.
+ The registry is global, and any existing mapping for either the
+ given prefix or the namespace URI will be removed.
+ *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+ attributes in this namespace will be serialized with prefix if possible.
+ ValueError is raised if prefix is reserved or is invalid.
+ """
+ if re.match(r"ns\d+$", prefix):
+ raise ValueError("Prefix format reserved for internal use")
+ for k, v in list(etree._namespace_map.items()):
+ if k == uri or v == prefix:
+ del etree._namespace_map[k]
+ etree._namespace_map[uri] = prefix
+
+if sys.version_info < (2, 7):
+ # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+ # .//node does not match if a node is a direct child of . !
+ def compat_xpath(xpath):
+ if isinstance(xpath, compat_str):
+ xpath = xpath.encode('ascii')
+ return xpath
+else:
+ compat_xpath = lambda xpath: xpath
+
+try:
+ from urllib.parse import parse_qs as compat_parse_qs
+except ImportError: # Python 2
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, compat_str
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError('bad query field: %r' % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = compat_urllib_parse_unquote(
+ name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = compat_urllib_parse_unquote(
+ value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
+ def compat_shlex_quote(s):
+ return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+ try:
+ from shlex import quote as compat_shlex_quote
+ except ImportError: # Python < 3.3
+ def compat_shlex_quote(s):
+ if re.match(r'^[-_\w./]+$', s):
+ return s
+ else:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+try:
+ args = shlex.split('中文')
+ assert (isinstance(args, list)
+ and isinstance(args[0], compat_str)
+ and args[0] == '中文')
+ compat_shlex_split = shlex.split
+except (AssertionError, UnicodeEncodeError):
+ # Working around shlex issue with unicode strings on some python 2
+ # versions (see http://bugs.python.org/issue1548891)
+ def compat_shlex_split(s, comments=False, posix=True):
+ if isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
+
+
+def compat_ord(c):
+ if type(c) is int:
+ return c
+ else:
+ return ord(c)
+
+
+if sys.version_info >= (3, 0):
+ compat_getenv = os.getenv
+ compat_expanduser = os.path.expanduser
+
+ def compat_setenv(key, value, env=os.environ):
+ env[key] = value
+else:
+ # Environment variables should be decoded with filesystem encoding.
+ # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
+ def compat_getenv(key, default=None):
+ from .utils import get_filesystem_encoding
+ env = os.getenv(key, default)
+ if env:
+ env = env.decode(get_filesystem_encoding())
+ return env
+
+ def compat_setenv(key, value, env=os.environ):
+ def encode(v):
+ from .utils import get_filesystem_encoding
+ return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
+ env[encode(key)] = encode(value)
+
+ # HACK: The default implementations of os.path.expanduser from cpython do not decode
+ # environment variables with filesystem encoding. We will work around this by
+ # providing adjusted implementations.
+ # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+ # for different platforms with correct environment variables decoding.
+
+ if compat_os_name == 'posix':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructions. If user or $HOME is unknown,
+ do nothing."""
+ if not path.startswith('~'):
+ return path
+ i = path.find('/', 1)
+ if i < 0:
+ i = len(path)
+ if i == 1:
+ if 'HOME' not in os.environ:
+ import pwd
+ userhome = pwd.getpwuid(os.getuid()).pw_dir
+ else:
+ userhome = compat_getenv('HOME')
+ else:
+ import pwd
+ try:
+ pwent = pwd.getpwnam(path[1:i])
+ except KeyError:
+ return path
+ userhome = pwent.pw_dir
+ userhome = userhome.rstrip('/')
+ return (userhome + path[i:]) or '/'
+ elif compat_os_name in ('nt', 'ce'):
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructs.
+
+ If user or $HOME is unknown, do nothing."""
+ if path[:1] != '~':
+ return path
+ i, n = 1, len(path)
+ while i < n and path[i] not in '/\\':
+ i = i + 1
+
+ if 'HOME' in os.environ:
+ userhome = compat_getenv('HOME')
+ elif 'USERPROFILE' in os.environ:
+ userhome = compat_getenv('USERPROFILE')
+ elif 'HOMEPATH' not in os.environ:
+ return path
+ else:
+ try:
+ drive = compat_getenv('HOMEDRIVE')
+ except KeyError:
+ drive = ''
+ userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+ if i != 1: # ~user
+ userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+ return userhome + path[i:]
+ else:
+ compat_expanduser = os.path.expanduser
+
+
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+ # os.path.realpath on Windows does not follow symbolic links
+ # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+ def compat_realpath(path):
+ while os.path.islink(path):
+ path = os.path.abspath(os.readlink(path))
+ return path
+else:
+ compat_realpath = os.path.realpath
+
+
+if sys.version_info < (3, 0):
+ def compat_print(s):
+ from .utils import preferredencoding
+ print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
+else:
+ def compat_print(s):
+ assert isinstance(s, compat_str)
+ print(s)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+ def compat_getpass(prompt, *args, **kwargs):
+ if isinstance(prompt, compat_str):
+ from .utils import preferredencoding
+ prompt = prompt.encode(preferredencoding())
+ return getpass.getpass(prompt, *args, **kwargs)
+else:
+ compat_getpass = getpass.getpass
+
+try:
+ compat_input = raw_input
+except NameError: # Python 3
+ compat_input = input
+
+# Python < 2.6.5 require kwargs to be bytes
+try:
+ def _testfunc(x):
+ pass
+ _testfunc(**{'x': 0})
+except TypeError:
+ def compat_kwargs(kwargs):
+ return dict((bytes(k), v) for k, v in kwargs.items())
+else:
+ compat_kwargs = lambda kwargs: kwargs
+
+
+try:
+ compat_numeric_types = (int, float, long, complex)
+except NameError: # Python 3
+ compat_numeric_types = (int, float, complex)
+
+
+try:
+ compat_integer_types = (int, long)
+except NameError: # Python 3
+ compat_integer_types = (int, )
+
+
+if sys.version_info < (2, 7):
+ def compat_socket_create_connection(address, timeout, source_address=None):
+ host, port = address
+ err = None
+ for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ if err is not None:
+ raise err
+ else:
+ raise socket.error('getaddrinfo returns an empty list')
+else:
+ compat_socket_create_connection = socket.create_connection
+
+
+# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
+# See http://bugs.python.org/issue9161 for what is broken
+def workaround_optparse_bug9161():
+ op = optparse.OptionParser()
+ og = optparse.OptionGroup(op, 'foo')
+ try:
+ og.add_option('-t')
+ except TypeError:
+ real_add_option = optparse.OptionGroup.add_option
+
+ def _compat_add_option(self, *args, **kwargs):
+ enc = lambda v: (
+ v.encode('ascii', 'replace') if isinstance(v, compat_str)
+ else v)
+ bargs = [enc(a) for a in args]
+ bkwargs = dict(
+ (k, enc(v)) for k, v in kwargs.items())
+ return real_add_option(self, *bargs, **bkwargs)
+ optparse.OptionGroup.add_option = _compat_add_option
+
+
+if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
+ compat_get_terminal_size = shutil.get_terminal_size
+else:
+ _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
+
+ def compat_get_terminal_size(fallback=(80, 24)):
+ columns = compat_getenv('COLUMNS')
+ if columns:
+ columns = int(columns)
+ else:
+ columns = None
+ lines = compat_getenv('LINES')
+ if lines:
+ lines = int(lines)
+ else:
+ lines = None
+
+ if columns is None or lines is None or columns <= 0 or lines <= 0:
+ try:
+ sp = subprocess.Popen(
+ ['stty', 'size'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = sp.communicate()
+ _lines, _columns = map(int, out.split())
+ except Exception:
+ _columns, _lines = _terminal_size(*fallback)
+
+ if columns is None or columns <= 0:
+ columns = _columns
+ if lines is None or lines <= 0:
+ lines = _lines
+ return _terminal_size(columns, lines)
+
+try:
+ itertools.count(start=0, step=1)
+ compat_itertools_count = itertools.count
+except TypeError: # Python 2.6
+ def compat_itertools_count(start=0, step=1):
+ n = start
+ while True:
+ yield n
+ n += step
+
+if sys.version_info >= (3, 0):
+ from tokenize import tokenize as compat_tokenize_tokenize
+else:
+ from tokenize import generate_tokens as compat_tokenize_tokenize
+
+
+try:
+ struct.pack('!I', 0)
+except TypeError:
+ # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
+ # See https://bugs.python.org/issue19099
+ def compat_struct_pack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.pack(spec, *args)
+
+ def compat_struct_unpack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.unpack(spec, *args)
+
+ class compat_Struct(struct.Struct):
+ def __init__(self, fmt):
+ if isinstance(fmt, compat_str):
+ fmt = fmt.encode('ascii')
+ super(compat_Struct, self).__init__(fmt)
+else:
+ compat_struct_pack = struct.pack
+ compat_struct_unpack = struct.unpack
+ if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+ class compat_Struct(struct.Struct):
+ def unpack(self, string):
+ if not isinstance(string, buffer): # noqa: F821
+ string = buffer(string) # noqa: F821
+ return super(compat_Struct, self).unpack(string)
+ else:
+ compat_Struct = struct.Struct
+
+
+try:
+ from future_builtins import zip as compat_zip
+except ImportError: # not 2.6+ or is 3.x
+ try:
+ from itertools import izip as compat_zip # < 2.5 or 3.x
+ except ImportError:
+ compat_zip = zip
+
+
+if sys.version_info < (3, 3):
+ def compat_b64decode(s, *args, **kwargs):
+ if isinstance(s, compat_str):
+ s = s.encode('ascii')
+ return base64.b64decode(s, *args, **kwargs)
+else:
+ compat_b64decode = base64.b64decode
+
+
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+ # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+ # names, see the original PyPy issue [1] and the hypervideo one [2].
+ # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+ # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+ def resf(tpl, *args, **kwargs):
+ funcname, dll = tpl
+ return real((str(funcname), dll), *args, **kwargs)
+
+ return resf
+else:
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+__all__ = [
+ 'compat_HTMLParseError',
+ 'compat_HTMLParser',
+ 'compat_HTTPError',
+ 'compat_Struct',
+ 'compat_b64decode',
+ 'compat_basestring',
+ 'compat_chr',
+ 'compat_cookiejar',
+ 'compat_cookiejar_Cookie',
+ 'compat_cookies',
+ 'compat_cookies_SimpleCookie',
+ 'compat_ctypes_WINFUNCTYPE',
+ 'compat_etree_Element',
+ 'compat_etree_fromstring',
+ 'compat_etree_register_namespace',
+ 'compat_expanduser',
+ 'compat_get_terminal_size',
+ 'compat_getenv',
+ 'compat_getpass',
+ 'compat_html_entities',
+ 'compat_html_entities_html5',
+ 'compat_http_client',
+ 'compat_http_server',
+ 'compat_input',
+ 'compat_integer_types',
+ 'compat_itertools_count',
+ 'compat_kwargs',
+ 'compat_numeric_types',
+ 'compat_ord',
+ 'compat_os_name',
+ 'compat_parse_qs',
+ 'compat_print',
+ 'compat_realpath',
+ 'compat_setenv',
+ 'compat_shlex_quote',
+ 'compat_shlex_split',
+ 'compat_socket_create_connection',
+ 'compat_str',
+ 'compat_struct_pack',
+ 'compat_struct_unpack',
+ 'compat_subprocess_get_DEVNULL',
+ 'compat_tokenize_tokenize',
+ 'compat_urllib_error',
+ 'compat_urllib_parse',
+ 'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_unquote_plus',
+ 'compat_urllib_parse_unquote_to_bytes',
+ 'compat_urllib_parse_urlencode',
+ 'compat_urllib_parse_urlparse',
+ 'compat_urllib_request',
+ 'compat_urllib_request_DataHandler',
+ 'compat_urllib_response',
+ 'compat_urlparse',
+ 'compat_urlretrieve',
+ 'compat_xml_parse_error',
+ 'compat_xpath',
+ 'compat_zip',
+ 'workaround_optparse_bug9161',
+]
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py
new file mode 100644
index 0000000..2e485df
--- /dev/null
+++ b/hypervideo_dl/downloader/__init__.py
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+from .common import FileDownloader
+from .f4m import F4mFD
+from .hls import HlsFD
+from .http import HttpFD
+from .rtmp import RtmpFD
+from .dash import DashSegmentsFD
+from .rtsp import RtspFD
+from .ism import IsmFD
+from .external import (
+ get_external_downloader,
+ FFmpegFD,
+)
+
+from ..utils import (
+ determine_protocol,
+)
+
+PROTOCOL_MAP = {
+ 'rtmp': RtmpFD,
+ 'm3u8_native': HlsFD,
+ 'm3u8': FFmpegFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
+ 'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
+ 'ism': IsmFD,
+}
+
+
+def get_suitable_downloader(info_dict, params={}):
+ """Get the downloader class that can handle the info dict."""
+ protocol = determine_protocol(info_dict)
+ info_dict['protocol'] = protocol
+
+ # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
+ # return FFmpegFD
+
+ external_downloader = params.get('external_downloader')
+ if external_downloader is not None:
+ ed = get_external_downloader(external_downloader)
+ if ed.can_download(info_dict):
+ return ed
+
+ if protocol.startswith('m3u8') and info_dict.get('is_live'):
+ return FFmpegFD
+
+ if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
+ return HlsFD
+
+ if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
+ return FFmpegFD
+
+ return PROTOCOL_MAP.get(protocol, HttpFD)
+
+
+__all__ = [
+ 'get_suitable_downloader',
+ 'FileDownloader',
+]
diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py
new file mode 100644
index 0000000..d023168
--- /dev/null
+++ b/hypervideo_dl/downloader/common.py
@@ -0,0 +1,391 @@
+from __future__ import division, unicode_literals
+
+import os
+import re
+import sys
+import time
+import random
+
+from ..compat import compat_os_name
+from ..utils import (
+ decodeArgument,
+ encodeFilename,
+ error_to_compat_str,
+ format_bytes,
+ shell_quote,
+ timeconvert,
+)
+
+
+class FileDownloader(object):
+ """File Downloader class.
+
+ File downloader objects are the ones responsible of downloading the
+ actual video file and writing it to disk.
+
+ File downloaders accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead.
+
+ Available options:
+
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ ratelimit: Download speed limit, in bytes/sec.
+ retries: Number of times to retry for HTTP error 5xx
+ buffersize: Size of download buffer in bytes.
+ noresizebuffer: Do not automatically resize the download buffer.
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ test: Download only first bytes to test the downloader.
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
+ xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+ external_downloader_args: A list of additional command-line arguments for the
+ external downloader.
+ hls_use_mpegts: Use the mpegts container for HLS videos.
+ http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
+ useful for bypassing bandwidth throttling imposed by
+ a webserver (experimental)
+
+ Subclasses of this one must re-define the real_download method.
+ """
+
+ _TEST_FILE_SIZE = 10241
+ params = None
+
+ def __init__(self, ydl, params):
+ """Create a FileDownloader object with the given options."""
+ self.ydl = ydl
+ self._progress_hooks = []
+ self.params = params
+ self.add_progress_hook(self.report_progress)
+
+ @staticmethod
+ def format_seconds(seconds):
+ (mins, secs) = divmod(seconds, 60)
+ (hours, mins) = divmod(mins, 60)
+ if hours > 99:
+ return '--:--:--'
+ if hours == 0:
+ return '%02d:%02d' % (mins, secs)
+ else:
+ return '%02d:%02d:%02d' % (hours, mins, secs)
+
+ @staticmethod
+ def calc_percent(byte_counter, data_len):
+ if data_len is None:
+ return None
+ return float(byte_counter) / float(data_len) * 100.0
+
+ @staticmethod
+ def format_percent(percent):
+ if percent is None:
+ return '---.-%'
+ return '%6s' % ('%3.1f%%' % percent)
+
+ @staticmethod
+ def calc_eta(start, now, total, current):
+ if total is None:
+ return None
+ if now is None:
+ now = time.time()
+ dif = now - start
+ if current == 0 or dif < 0.001: # One millisecond
+ return None
+ rate = float(current) / dif
+ return int((float(total) - float(current)) / rate)
+
+ @staticmethod
+ def format_eta(eta):
+ if eta is None:
+ return '--:--'
+ return FileDownloader.format_seconds(eta)
+
+ @staticmethod
+ def calc_speed(start, now, bytes):
+ dif = now - start
+ if bytes == 0 or dif < 0.001: # One millisecond
+ return None
+ return float(bytes) / dif
+
+ @staticmethod
+ def format_speed(speed):
+ if speed is None:
+ return '%10s' % '---b/s'
+ return '%10s' % ('%s/s' % format_bytes(speed))
+
+ @staticmethod
+ def format_retries(retries):
+ return 'inf' if retries == float('inf') else '%.0f' % retries
+
+ @staticmethod
+ def best_block_size(elapsed_time, bytes):
+ new_min = max(bytes / 2.0, 1.0)
+ new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+ if elapsed_time < 0.001:
+ return int(new_max)
+ rate = bytes / elapsed_time
+ if rate > new_max:
+ return int(new_max)
+ if rate < new_min:
+ return int(new_min)
+ return int(rate)
+
+ @staticmethod
+ def parse_bytes(bytestr):
+ """Parse a string indicating a byte quantity into an integer."""
+ matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
+ if matchobj is None:
+ return None
+ number = float(matchobj.group(1))
+ multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
+ return int(round(number * multiplier))
+
+ def to_screen(self, *args, **kargs):
+ self.ydl.to_screen(*args, **kargs)
+
+ def to_stderr(self, message):
+ self.ydl.to_screen(message)
+
+ def to_console_title(self, message):
+ self.ydl.to_console_title(message)
+
+ def trouble(self, *args, **kargs):
+ self.ydl.trouble(*args, **kargs)
+
+ def report_warning(self, *args, **kargs):
+ self.ydl.report_warning(*args, **kargs)
+
+ def report_error(self, *args, **kargs):
+ self.ydl.report_error(*args, **kargs)
+
+ def slow_down(self, start_time, now, byte_counter):
+ """Sleep if the download speed is over the rate limit."""
+ rate_limit = self.params.get('ratelimit')
+ if rate_limit is None or byte_counter == 0:
+ return
+ if now is None:
+ now = time.time()
+ elapsed = now - start_time
+ if elapsed <= 0.0:
+ return
+ speed = float(byte_counter) / elapsed
+ if speed > rate_limit:
+ sleep_time = float(byte_counter) / rate_limit - elapsed
+ if sleep_time > 0:
+ time.sleep(sleep_time)
+
+ def temp_name(self, filename):
+ """Returns a temporary filename for the given filename."""
+ if self.params.get('nopart', False) or filename == '-' or \
+ (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
+ return filename
+ return filename + '.part'
+
+ def undo_temp_name(self, filename):
+ if filename.endswith('.part'):
+ return filename[:-len('.part')]
+ return filename
+
+ def ytdl_filename(self, filename):
+ return filename + '.ytdl'
+
+ def try_rename(self, old_filename, new_filename):
+ try:
+ if old_filename == new_filename:
+ return
+ os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
+ except (IOError, OSError) as err:
+ self.report_error('unable to rename file: %s' % error_to_compat_str(err))
+
+ def try_utime(self, filename, last_modified_hdr):
+ """Try to set the last-modified time of the given file."""
+ if last_modified_hdr is None:
+ return
+ if not os.path.isfile(encodeFilename(filename)):
+ return
+ timestr = last_modified_hdr
+ if timestr is None:
+ return
+ filetime = timeconvert(timestr)
+ if filetime is None:
+ return filetime
+ # Ignore obviously invalid dates
+ if filetime == 0:
+ return
+ try:
+ os.utime(filename, (time.time(), filetime))
+ except Exception:
+ pass
+ return filetime
+
+ def report_destination(self, filename):
+ """Report destination filename."""
+ self.to_screen('[download] Destination: ' + filename)
+
+ def _report_progress_status(self, msg, is_last_line=False):
+ fullmsg = '[download] ' + msg
+ if self.params.get('progress_with_newline', False):
+ self.to_screen(fullmsg)
+ else:
+ if compat_os_name == 'nt':
+ prev_len = getattr(self, '_report_progress_prev_line_length',
+ 0)
+ if prev_len > len(fullmsg):
+ fullmsg += ' ' * (prev_len - len(fullmsg))
+ self._report_progress_prev_line_length = len(fullmsg)
+ clear_line = '\r'
+ else:
+ clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
+ self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
+ self.to_console_title('hypervideo ' + msg)
+
+ def report_progress(self, s):
+ if s['status'] == 'finished':
+ if self.params.get('noprogress', False):
+ self.to_screen('[download] Download completed')
+ else:
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template += ' in %(_elapsed_str)s'
+ self._report_progress_status(
+ msg_template % s, is_last_line=True)
+
+ if self.params.get('noprogress'):
+ return
+
+ if s['status'] != 'downloading':
+ return
+
+ if s.get('eta') is not None:
+ s['_eta_str'] = self.format_eta(s['eta'])
+ else:
+ s['_eta_str'] = 'Unknown ETA'
+
+ if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+ elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+ else:
+ if s.get('downloaded_bytes') == 0:
+ s['_percent_str'] = self.format_percent(0)
+ else:
+ s['_percent_str'] = 'Unknown %'
+
+ if s.get('speed') is not None:
+ s['_speed_str'] = self.format_speed(s['speed'])
+ else:
+ s['_speed_str'] = 'Unknown speed'
+
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ elif s.get('total_bytes_estimate') is not None:
+ s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+ msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ else:
+ if s.get('downloaded_bytes') is not None:
+ s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+ if s.get('elapsed'):
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+ else:
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+ else:
+ msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+ self._report_progress_status(msg_template % s)
+
+ def report_resuming_byte(self, resume_len):
+ """Report attempt to resume at given byte."""
+ self.to_screen('[download] Resuming download at byte %s' % resume_len)
+
+ def report_retry(self, err, count, retries):
+ """Report retry in case of HTTP error 5xx"""
+ self.to_screen(
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ % (error_to_compat_str(err), count, self.format_retries(retries)))
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def report_unable_to_resume(self):
+ """Report it was impossible to resume download."""
+ self.to_screen('[download] Unable to resume')
+
+ def download(self, filename, info_dict):
+ """Download to a filename using the info from info_dict
+ Return True on success and False otherwise
+ """
+
+ nooverwrites_and_exists = (
+ self.params.get('nooverwrites', False)
+ and os.path.exists(encodeFilename(filename))
+ )
+
+ if not hasattr(filename, 'write'):
+ continuedl_and_exists = (
+ self.params.get('continuedl', True)
+ and os.path.isfile(encodeFilename(filename))
+ and not self.params.get('nopart', False)
+ )
+
+ # Check file already present
+ if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+ self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ 'total_bytes': os.path.getsize(encodeFilename(filename)),
+ })
+ return True
+
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+
+ return self.real_download(filename, info_dict)
+
+ def real_download(self, filename, info_dict):
+ """Real download process. Redefine in subclasses."""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _hook_progress(self, status):
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ # See YoutubeDl.py (search for progress_hooks) for a description of
+ # this interface
+ self._progress_hooks.append(ph)
+
+ def _debug_cmd(self, args, exe=None):
+ if not self.params.get('verbose', False):
+ return
+
+ str_args = [decodeArgument(a) for a in args]
+
+ if exe is None:
+ exe = os.path.basename(str_args[0])
+
+ self.to_screen('[debug] %s command line: %s' % (
+ exe, shell_quote(str_args)))
diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py
new file mode 100644
index 0000000..c6d674b
--- /dev/null
+++ b/hypervideo_dl/downloader/dash.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+ DownloadError,
+ urljoin,
+)
+
+
+class DashSegmentsFD(FragmentFD):
+ """
+ Download segments in a DASH manifest
+ """
+
+ FD_NAME = 'dashsegments'
+
+ def real_download(self, filename, info_dict):
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(fragments),
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ frag_index = 0
+ for i, fragment in enumerate(fragments):
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ # In DASH, the first segment contains necessary headers to
+ # generate a valid MP4 file, so always abort for the first segment
+ fatal = i == 0 or not skip_unavailable_fragments
+ count = 0
+ while count <= fragment_retries:
+ try:
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ if not success:
+ return False
+ self._append_fragment(ctx, frag_content)
+ break
+ except compat_urllib_error.HTTPError as err:
+ # YouTube may often return 404 HTTP error for a fragment causing the
+ # whole download to fail. However if the same fragment is immediately
+ # retried with the same request data this usually succeeds (1-2 attempts
+ # is usually enough) thus allowing to download the whole file successfully.
+ # To be future-proof we will retry all fragments that fail with any
+ # HTTP error.
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ except DownloadError:
+ # Don't retry fragment if error occurred during HTTP downloading
+ # itself since it has own retry settings
+ if not fatal:
+ self.report_skip_fragment(frag_index)
+ break
+ raise
+
+ if count > fragment_retries:
+ if not fatal:
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py
new file mode 100644
index 0000000..c31f891
--- /dev/null
+++ b/hypervideo_dl/downloader/external.py
@@ -0,0 +1,371 @@
+from __future__ import unicode_literals
+
+import os.path
+import re
+import subprocess
+import sys
+import time
+
+from .common import FileDownloader
+from ..compat import (
+ compat_setenv,
+ compat_str,
+)
+from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+from ..utils import (
+ cli_option,
+ cli_valueless_option,
+ cli_bool_option,
+ cli_configuration_args,
+ encodeFilename,
+ encodeArgument,
+ handle_youtubedl_headers,
+ check_executable,
+ is_outdated_version,
+)
+
+
+class ExternalFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+
+ try:
+ started = time.time()
+ retval = self._call_downloader(tmpfilename, info_dict)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ # Live stream downloading cancellation should be considered as
+ # correct and expected termination thus all postprocessing
+ # should take place
+ retval = 0
+ self.to_screen('[%s] Interrupted by user' % self.get_basename())
+
+ if retval == 0:
+ status = {
+ 'filename': filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - started,
+ }
+ if filename != '-':
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+ self.try_rename(tmpfilename, filename)
+ status.update({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ })
+ self._hook_progress(status)
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (
+ self.get_basename(), retval))
+ return False
+
+ @classmethod
+ def get_basename(cls):
+ return cls.__name__[:-2].lower()
+
+ @property
+ def exe(self):
+ return self.params.get('external_downloader')
+
+ @classmethod
+ def available(cls):
+ return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
+
+ @classmethod
+ def supports(cls, info_dict):
+ return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
+
+ @classmethod
+ def can_download(cls, info_dict):
+ return cls.available() and cls.supports(info_dict)
+
+ def _option(self, command_option, param):
+ return cli_option(self.params, command_option, param)
+
+ def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
+ return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
+
+ def _valueless_option(self, command_option, param, expected_value=True):
+ return cli_valueless_option(self.params, command_option, param, expected_value)
+
+ def _configuration_args(self, default=[]):
+ return cli_configuration_args(self.params, 'external_downloader_args', default)
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ """ Either overwrite this or implement _make_cmd """
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
+
+ p = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE)
+ _, stderr = p.communicate()
+ if p.returncode != 0:
+ self.to_stderr(stderr.decode('utf-8', 'replace'))
+ return p.returncode
+
+
+class CurlFD(ExternalFD):
+ AVAILABLE_OPT = '-V'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '--location', '-o', tmpfilename]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
+ cmd += self._valueless_option('--silent', 'noprogress')
+ cmd += self._valueless_option('--verbose', 'verbose')
+ cmd += self._option('--limit-rate', 'ratelimit')
+ retry = self._option('--retry', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '2147483647'
+ cmd += retry
+ cmd += self._option('--max-filesize', 'max_filesize')
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--insecure', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
+
+ # curl writes the progress to stderr so don't capture it.
+ p = subprocess.Popen(cmd)
+ p.communicate()
+ return p.returncode
+
+
+class AxelFD(ExternalFD):
+ AVAILABLE_OPT = '-V'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-o', tmpfilename]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['-H', '%s: %s' % (key, val)]
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class WgetFD(ExternalFD):
+ AVAILABLE_OPT = '--version'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--limit-rate', 'ratelimit')
+ retry = self._option('--tries', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '0'
+ cmd += retry
+ cmd += self._option('--bind-address', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class Aria2cFD(ExternalFD):
+ AVAILABLE_OPT = '-v'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-c']
+ cmd += self._configuration_args([
+ '--min-split-size', '1M', '--max-connection-per-server', '4'])
+ dn = os.path.dirname(tmpfilename)
+ if dn:
+ cmd += ['--dir', dn]
+ cmd += ['--out', os.path.basename(tmpfilename)]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--all-proxy', 'proxy')
+ cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
+ cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class HttpieFD(ExternalFD):
+ @classmethod
+ def available(cls):
+ return check_executable('http', ['--version'])
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['%s:%s' % (key, val)]
+ return cmd
+
+
+class FFmpegFD(ExternalFD):
+ @classmethod
+ def supports(cls, info_dict):
+ return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+
+ @classmethod
+ def available(cls):
+ return FFmpegPostProcessor().available
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ url = info_dict['url']
+ ffpp = FFmpegPostProcessor(downloader=self)
+ if not ffpp.available:
+ self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+ return False
+ ffpp.check_version()
+
+ args = [ffpp.executable, '-y']
+
+ for log_level in ('quiet', 'verbose'):
+ if self.params.get(log_level, False):
+ args += ['-loglevel', log_level]
+ break
+
+ seekable = info_dict.get('_seekable')
+ if seekable is not None:
+ # setting -seekable prevents ffmpeg from guessing if the server
+ # supports seeking(by adding the header `Range: bytes=0-`), which
+ # can cause problems in some cases
+ # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
+ # http://trac.ffmpeg.org/ticket/6125#comment:10
+ args += ['-seekable', '1' if seekable else '0']
+
+ args += self._configuration_args()
+
+ # start_time = info_dict.get('start_time') or 0
+ # if start_time:
+ # args += ['-ss', compat_str(start_time)]
+ # end_time = info_dict.get('end_time')
+ # if end_time:
+ # args += ['-t', compat_str(end_time - start_time)]
+
+ if info_dict['http_headers'] and re.match(r'^https?://', url):
+ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+ # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+ headers = handle_youtubedl_headers(info_dict['http_headers'])
+ args += [
+ '-headers',
+ ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+
+ env = None
+ proxy = self.params.get('proxy')
+ if proxy:
+ if not re.match(r'^[\da-zA-Z]+://', proxy):
+ proxy = 'http://%s' % proxy
+
+ if proxy.startswith('socks'):
+ self.report_warning(
+ '%s does not support SOCKS proxies. Downloading is likely to fail. '
+ 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
+
+ # Since December 2015 ffmpeg supports -http_proxy option (see
+ # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
+ # We could switch to the following code if we are able to detect version properly
+ # args += ['-http_proxy', proxy]
+ env = os.environ.copy()
+ compat_setenv('HTTP_PROXY', proxy, env=env)
+ compat_setenv('http_proxy', proxy, env=env)
+
+ protocol = info_dict.get('protocol')
+
+ if protocol == 'rtmp':
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
+ live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
+ if player_url is not None:
+ args += ['-rtmp_swfverify', player_url]
+ if page_url is not None:
+ args += ['-rtmp_pageurl', page_url]
+ if app is not None:
+ args += ['-rtmp_app', app]
+ if play_path is not None:
+ args += ['-rtmp_playpath', play_path]
+ if tc_url is not None:
+ args += ['-rtmp_tcurl', tc_url]
+ if flash_version is not None:
+ args += ['-rtmp_flashver', flash_version]
+ if live:
+ args += ['-rtmp_live', 'live']
+ if isinstance(conn, list):
+ for entry in conn:
+ args += ['-rtmp_conn', entry]
+ elif isinstance(conn, compat_str):
+ args += ['-rtmp_conn', conn]
+
+ args += ['-i', url, '-c', 'copy']
+
+ if self.params.get('test', False):
+ args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+
+ if protocol in ('m3u8', 'm3u8_native'):
+ if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
+ args += ['-f', 'mpegts']
+ else:
+ args += ['-f', 'mp4']
+ if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+ args += ['-bsf:a', 'aac_adtstoasc']
+ elif protocol == 'rtmp':
+ args += ['-f', 'flv']
+ else:
+ args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+
+ args = [encodeArgument(opt) for opt in args]
+ args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
+
+ self._debug_cmd(args)
+
+ proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+ try:
+ retval = proc.wait()
+ except KeyboardInterrupt:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+ if sys.platform != 'win32':
+ proc.communicate(b'q')
+ raise
+ return retval
+
+
+class AVconvFD(FFmpegFD):
+ pass
+
+
+_BY_NAME = dict(
+ (klass.get_basename(), klass)
+ for name, klass in globals().items()
+ if name.endswith('FD') and name != 'ExternalFD'
+)
+
+
+def list_external_downloaders():
+ return sorted(_BY_NAME.keys())
+
+
+def get_external_downloader(external_downloader):
+ """ Given the name of the executable, see whether we support the given
+ downloader . """
+ # Drop .exe extension on Windows
+ bn = os.path.splitext(os.path.basename(external_downloader))[0]
+ return _BY_NAME[bn]
diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py
new file mode 100644
index 0000000..8dd3c2e
--- /dev/null
+++ b/hypervideo_dl/downloader/f4m.py
@@ -0,0 +1,438 @@
+from __future__ import division, unicode_literals
+
+import io
+import itertools
+import time
+
+from .fragment import FragmentFD
+from ..compat import (
+ compat_b64decode,
+ compat_etree_fromstring,
+ compat_urlparse,
+ compat_urllib_error,
+ compat_urllib_parse_urlparse,
+ compat_struct_pack,
+ compat_struct_unpack,
+)
+from ..utils import (
+ fix_xml_ampersands,
+ xpath_text,
+)
+
+
+class DataTruncatedError(Exception):
+ pass
+
+
+class FlvReader(io.BytesIO):
+ """
+ Reader for Flv files
+ The file format is documented in https://www.adobe.com/devnet/f4v.html
+ """
+
+ def read_bytes(self, n):
+ data = self.read(n)
+ if len(data) < n:
+ raise DataTruncatedError(
+ 'FlvReader error: need %d bytes while only %d bytes got' % (
+ n, len(data)))
+ return data
+
+ # Utility functions for reading numbers and strings
+ def read_unsigned_long_long(self):
+ return compat_struct_unpack('!Q', self.read_bytes(8))[0]
+
+ def read_unsigned_int(self):
+ return compat_struct_unpack('!I', self.read_bytes(4))[0]
+
+ def read_unsigned_char(self):
+ return compat_struct_unpack('!B', self.read_bytes(1))[0]
+
+ def read_string(self):
+ res = b''
+ while True:
+ char = self.read_bytes(1)
+ if char == b'\x00':
+ break
+ res += char
+ return res
+
+ def read_box_info(self):
+ """
+ Read a box and return the info as a tuple: (box_size, box_type, box_data)
+ """
+ real_size = size = self.read_unsigned_int()
+ box_type = self.read_bytes(4)
+ header_end = 8
+ if size == 1:
+ real_size = self.read_unsigned_long_long()
+ header_end = 16
+ return real_size, box_type, self.read_bytes(real_size - header_end)
+
+ def read_asrt(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+ quality_entry_count = self.read_unsigned_char()
+ # QualityEntryCount
+ for i in range(quality_entry_count):
+ self.read_string()
+
+ segment_run_count = self.read_unsigned_int()
+ segments = []
+ for i in range(segment_run_count):
+ first_segment = self.read_unsigned_int()
+ fragments_per_segment = self.read_unsigned_int()
+ segments.append((first_segment, fragments_per_segment))
+
+ return {
+ 'segment_run': segments,
+ }
+
+ def read_afrt(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+ # time scale
+ self.read_unsigned_int()
+
+ quality_entry_count = self.read_unsigned_char()
+ # QualitySegmentUrlModifiers
+ for i in range(quality_entry_count):
+ self.read_string()
+
+ fragments_count = self.read_unsigned_int()
+ fragments = []
+ for i in range(fragments_count):
+ first = self.read_unsigned_int()
+ first_ts = self.read_unsigned_long_long()
+ duration = self.read_unsigned_int()
+ if duration == 0:
+ discontinuity_indicator = self.read_unsigned_char()
+ else:
+ discontinuity_indicator = None
+ fragments.append({
+ 'first': first,
+ 'ts': first_ts,
+ 'duration': duration,
+ 'discontinuity_indicator': discontinuity_indicator,
+ })
+
+ return {
+ 'fragments': fragments,
+ }
+
+ def read_abst(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+
+ self.read_unsigned_int() # BootstrapinfoVersion
+ # Profile,Live,Update,Reserved
+ flags = self.read_unsigned_char()
+ live = flags & 0x20 != 0
+ # time scale
+ self.read_unsigned_int()
+ # CurrentMediaTime
+ self.read_unsigned_long_long()
+ # SmpteTimeCodeOffset
+ self.read_unsigned_long_long()
+
+ self.read_string() # MovieIdentifier
+ server_count = self.read_unsigned_char()
+ # ServerEntryTable
+ for i in range(server_count):
+ self.read_string()
+ quality_count = self.read_unsigned_char()
+ # QualityEntryTable
+ for i in range(quality_count):
+ self.read_string()
+ # DrmData
+ self.read_string()
+ # MetaData
+ self.read_string()
+
+ segments_count = self.read_unsigned_char()
+ segments = []
+ for i in range(segments_count):
+ box_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'asrt'
+ segment = FlvReader(box_data).read_asrt()
+ segments.append(segment)
+ fragments_run_count = self.read_unsigned_char()
+ fragments = []
+ for i in range(fragments_run_count):
+ box_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'afrt'
+ fragments.append(FlvReader(box_data).read_afrt())
+
+ return {
+ 'segments': segments,
+ 'fragments': fragments,
+ 'live': live,
+ }
+
+ def read_bootstrap_info(self):
+ total_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'abst'
+ return FlvReader(box_data).read_abst()
+
+
+def read_bootstrap_info(bootstrap_bytes):
+ return FlvReader(bootstrap_bytes).read_bootstrap_info()
+
+
+def build_fragments_list(boot_info):
+ """ Return a list of (segment, fragment) for each fragment in the video """
+ res = []
+ segment_run_table = boot_info['segments'][0]
+ fragment_run_entry_table = boot_info['fragments'][0]['fragments']
+ first_frag_number = fragment_run_entry_table[0]['first']
+ fragments_counter = itertools.count(first_frag_number)
+ for segment, fragments_count in segment_run_table['segment_run']:
+ # In some live HDS streams (for example Rai), `fragments_count` is
+ # abnormal and causing out-of-memory errors. It's OK to change the
+ # number of fragments for live streams as they are updated periodically
+ if fragments_count == 4294967295 and boot_info['live']:
+ fragments_count = 2
+ for _ in range(fragments_count):
+ res.append((segment, next(fragments_counter)))
+
+ if boot_info['live']:
+ res = res[-2:]
+
+ return res
+
+
+def write_unsigned_int(stream, val):
+ stream.write(compat_struct_pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+ stream.write(compat_struct_pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+ """Writes the FLV header to stream"""
+ # FLV header
+ stream.write(b'FLV\x01')
+ stream.write(b'\x05')
+ stream.write(b'\x00\x00\x00\x09')
+ stream.write(b'\x00\x00\x00\x00')
+
+
+def write_metadata_tag(stream, metadata):
+ """Writes optional metadata tag to stream"""
+ SCRIPT_TAG = b'\x12'
+ FLV_TAG_HEADER_LEN = 11
+
+ if metadata:
+ stream.write(SCRIPT_TAG)
+ write_unsigned_int_24(stream, len(metadata))
+ stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+ stream.write(metadata)
+ write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
+
+
+def remove_encrypted_media(media):
+ return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
+ and 'drmAdditionalHeaderSetId' not in e.attrib,
+ media))
+
+
+def _add_ns(prop, ver=1):
+ return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+ base_url = xpath_text(
+ manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
+ return base_url
+
+
+class F4mFD(FragmentFD):
+ """
+ A downloader for f4m manifests or AdobeHDS.
+ """
+
+ FD_NAME = 'f4m'
+
+ def _get_unencrypted_media(self, doc):
+ media = doc.findall(_add_ns('media'))
+ if not media:
+ self.report_error('No media found')
+ for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ # If id attribute is missing it's valid for all media nodes
+ # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
+ if 'id' not in e.attrib:
+ self.report_error('Missing ID in f4m DRM')
+ media = remove_encrypted_media(media)
+ if not media:
+ self.report_error('Unsupported DRM')
+ return media
+
+ def _get_bootstrap_from_url(self, bootstrap_url):
+ bootstrap = self.ydl.urlopen(bootstrap_url).read()
+ return read_bootstrap_info(bootstrap)
+
+ def _update_live_fragments(self, bootstrap_url, latest_fragment):
+ fragments_list = []
+ retries = 30
+ while (not fragments_list) and (retries > 0):
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ fragments_list = build_fragments_list(boot_info)
+ fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+ if not fragments_list:
+ # Retry after a while
+ time.sleep(5.0)
+ retries -= 1
+
+ if not fragments_list:
+ self.report_error('Failed to update fragments')
+
+ return fragments_list
+
+ def _parse_bootstrap_node(self, node, base_url):
+ # Sometimes non empty inline bootstrap info can be specified along
+ # with bootstrap url attribute (e.g. dummy inline bootstrap info
+ # contains whitespace characters in [1]). We will prefer bootstrap
+ # url over inline bootstrap info when present.
+ # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
+ bootstrap_url = node.get('url')
+ if bootstrap_url:
+ bootstrap_url = compat_urlparse.urljoin(
+ base_url, bootstrap_url)
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ else:
+ bootstrap_url = None
+ bootstrap = compat_b64decode(node.text)
+ boot_info = read_bootstrap_info(bootstrap)
+ return boot_info, bootstrap_url
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ requested_bitrate = info_dict.get('tbr')
+ self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.geturl()
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/ytdl-org/youtube-dl/issues/7823)
+ manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
+
+ doc = compat_etree_fromstring(manifest)
+ formats = [(int(f.attrib.get('bitrate', -1)), f)
+ for f in self._get_unencrypted_media(doc)]
+ if requested_bitrate is None or len(formats) == 1:
+ # get the best format
+ formats = sorted(formats, key=lambda f: f[0])
+ rate, media = formats[-1]
+ else:
+ rate, media = list(filter(
+ lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
+ # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+ man_base_url = get_base_url(doc) or man_url
+
+ base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
+ bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
+ boot_info, bootstrap_url = self._parse_bootstrap_node(
+ bootstrap_node, man_base_url)
+ live = boot_info['live']
+ metadata_node = media.find(_add_ns('metadata'))
+ if metadata_node is not None:
+ metadata = compat_b64decode(metadata_node.text)
+ else:
+ metadata = None
+
+ fragments_list = build_fragments_list(boot_info)
+ test = self.params.get('test', False)
+ if test:
+ # We only download the first fragment
+ fragments_list = fragments_list[:1]
+ total_frags = len(fragments_list)
+ # For some akamai manifests we'll need to add a query to the fragment url
+ akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': total_frags,
+ 'live': live,
+ }
+
+ self._prepare_frag_download(ctx)
+
+ dest_stream = ctx['dest_stream']
+
+ if ctx['complete_frags_downloaded_bytes'] == 0:
+ write_flv_header(dest_stream)
+ if not live:
+ write_metadata_tag(dest_stream, metadata)
+
+ base_url_parsed = compat_urllib_parse_urlparse(base_url)
+
+ self._start_frag_download(ctx)
+
+ frag_index = 0
+ while fragments_list:
+ seg_i, frag_i = fragments_list.pop(0)
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ name = 'Seg%d-Frag%d' % (seg_i, frag_i)
+ query = []
+ if base_url_parsed.query:
+ query.append(base_url_parsed.query)
+ if akamai_pv:
+ query.append(akamai_pv.strip(';'))
+ if info_dict.get('extra_param_to_segment_url'):
+ query.append(info_dict['extra_param_to_segment_url'])
+ url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
+ try:
+ success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+ if not success:
+ return False
+ reader = FlvReader(down_data)
+ while True:
+ try:
+ _, box_type, box_data = reader.read_box_info()
+ except DataTruncatedError:
+ if test:
+ # In tests, segments may be truncated, and thus
+ # FlvReader may not be able to parse the whole
+ # chunk. If so, write the segment as is
+ # See https://github.com/ytdl-org/youtube-dl/issues/9214
+ dest_stream.write(down_data)
+ break
+ raise
+ if box_type == b'mdat':
+ self._append_fragment(ctx, box_data)
+ break
+ except (compat_urllib_error.HTTPError, ) as err:
+ if live and (err.code == 404 or err.code == 410):
+ # We didn't keep up with the live window. Continue
+ # with the next available fragment.
+ msg = 'Fragment %d unavailable' % frag_i
+ self.report_warning(msg)
+ fragments_list = []
+ else:
+ raise
+
+ if not fragments_list and not test and live and bootstrap_url:
+ fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+ total_frags += len(fragments_list)
+ if fragments_list and (fragments_list[0][1] > frag_i + 1):
+ msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+ self.report_warning(msg)
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py
new file mode 100644
index 0000000..b82e3cf
--- /dev/null
+++ b/hypervideo_dl/downloader/fragment.py
@@ -0,0 +1,279 @@
+from __future__ import division, unicode_literals
+
+import os
+import time
+import json
+
+from .common import FileDownloader
+from .http import HttpFD
+from ..utils import (
+ error_to_compat_str,
+ encodeFilename,
+ sanitize_open,
+ sanitized_Request,
+)
+
+
+class HttpQuietDownloader(HttpFD):
+ def to_screen(self, *args, **kargs):
+ pass
+
+
+class FragmentFD(FileDownloader):
+ """
+ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
+
+ Available options:
+
+ fragment_retries: Number of times to retry a fragment for HTTP error (DASH
+ and hlsnative only)
+ skip_unavailable_fragments:
+ Skip unavailable fragments (DASH and hlsnative only)
+ keep_fragments: Keep downloaded fragments on disk after downloading is
+ finished
+
+ For each incomplete fragment download hypervideo keeps on disk a special
+ bookkeeping file with download state and metadata (in future such files will
+ be used for any incomplete download handled by hypervideo). This file is
+ used to properly handle resuming, check download file consistency and detect
+ potential errors. The file has a .ytdl extension and represents a standard
+ JSON file of the following format:
+
+ extractor:
+ Dictionary of extractor related data. TBD.
+
+ downloader:
+ Dictionary of downloader related data. May contain following data:
+ current_fragment:
+ Dictionary with current (being downloaded) fragment data:
+ index: 0-based index of current fragment among all fragments
+ fragment_count:
+ Total count of fragments
+
+ This feature is experimental and file format may change in future.
+ """
+
+ def report_retry_fragment(self, err, frag_index, count, retries):
+ self.to_screen(
+ '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+ % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
+
+ def report_skip_fragment(self, frag_index):
+ self.to_screen('[download] Skipping fragment %d...' % frag_index)
+
+ def _prepare_url(self, info_dict, url):
+ headers = info_dict.get('http_headers')
+ return sanitized_Request(url, None, headers) if headers else url
+
+ def _prepare_and_start_frag_download(self, ctx):
+ self._prepare_frag_download(ctx)
+ self._start_frag_download(ctx)
+
+ @staticmethod
+ def __do_ytdl_file(ctx):
+ return not ctx['live'] and not ctx['tmpfilename'] == '-'
+
+ def _read_ytdl_file(self, ctx):
+ assert 'ytdl_corrupt' not in ctx
+ stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+ try:
+ ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+ except Exception:
+ ctx['ytdl_corrupt'] = True
+ finally:
+ stream.close()
+
+ def _write_ytdl_file(self, ctx):
+ frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+ downloader = {
+ 'current_fragment': {
+ 'index': ctx['fragment_index'],
+ },
+ }
+ if ctx.get('fragment_count') is not None:
+ downloader['fragment_count'] = ctx['fragment_count']
+ frag_index_stream.write(json.dumps({'downloader': downloader}))
+ frag_index_stream.close()
+
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+ fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
+ fragment_info_dict = {
+ 'url': frag_url,
+ 'http_headers': headers or info_dict.get('http_headers'),
+ }
+ success = ctx['dl'].download(fragment_filename, fragment_info_dict)
+ if not success:
+ return False, None
+ if fragment_info_dict.get('filetime'):
+ ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
+ down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+ ctx['fragment_filename_sanitized'] = frag_sanitized
+ frag_content = down.read()
+ down.close()
+ return True, frag_content
+
+ def _append_fragment(self, ctx, frag_content):
+ try:
+ ctx['dest_stream'].write(frag_content)
+ ctx['dest_stream'].flush()
+ finally:
+ if self.__do_ytdl_file(ctx):
+ self._write_ytdl_file(ctx)
+ if not self.params.get('keep_fragments', False):
+ os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
+ del ctx['fragment_filename_sanitized']
+
+ def _prepare_frag_download(self, ctx):
+ if 'live' not in ctx:
+ ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(
+ '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.report_destination(ctx['filename'])
+ dl = HttpQuietDownloader(
+ self.ydl,
+ {
+ 'continuedl': True,
+ 'quiet': True,
+ 'noprogress': True,
+ 'ratelimit': self.params.get('ratelimit'),
+ 'retries': self.params.get('retries', 0),
+ 'nopart': self.params.get('nopart', False),
+ 'test': self.params.get('test', False),
+ }
+ )
+ tmpfilename = self.temp_name(ctx['filename'])
+ open_mode = 'wb'
+ resume_len = 0
+
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(tmpfilename)):
+ open_mode = 'ab'
+ resume_len = os.path.getsize(encodeFilename(tmpfilename))
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
+ })
+
+ if self.__do_ytdl_file(ctx):
+ if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+ self._read_ytdl_file(ctx)
+ is_corrupt = ctx.get('ytdl_corrupt') is True
+ is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+ if is_corrupt or is_inconsistent:
+ message = (
+ '.ytdl file is corrupt' if is_corrupt else
+ 'Inconsistent state of incomplete fragment download')
+ self.report_warning(
+ '%s. Restarting from the beginning...' % message)
+ ctx['fragment_index'] = resume_len = 0
+ if 'ytdl_corrupt' in ctx:
+ del ctx['ytdl_corrupt']
+ self._write_ytdl_file(ctx)
+ else:
+ self._write_ytdl_file(ctx)
+ assert ctx['fragment_index'] == 0
+
+ dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+
+ ctx.update({
+ 'dl': dl,
+ 'dest_stream': dest_stream,
+ 'tmpfilename': tmpfilename,
+ # Total complete fragments downloaded so far in bytes
+ 'complete_frags_downloaded_bytes': resume_len,
+ })
+
+ def _start_frag_download(self, ctx):
+ resume_len = ctx['complete_frags_downloaded_bytes']
+ total_frags = ctx['total_frags']
+ # This dict stores the download progress, it's updated by the progress
+ # hook
+ state = {
+ 'status': 'downloading',
+ 'downloaded_bytes': resume_len,
+ 'fragment_index': ctx['fragment_index'],
+ 'fragment_count': total_frags,
+ 'filename': ctx['filename'],
+ 'tmpfilename': ctx['tmpfilename'],
+ }
+
+ start = time.time()
+ ctx.update({
+ 'started': start,
+ # Amount of fragment's bytes downloaded by the time of the previous
+ # frag progress hook invocation
+ 'prev_frag_downloaded_bytes': 0,
+ })
+
+ def frag_progress_hook(s):
+ if s['status'] not in ('downloading', 'finished'):
+ return
+
+ time_now = time.time()
+ state['elapsed'] = time_now - start
+ frag_total_bytes = s.get('total_bytes') or 0
+ if not ctx['live']:
+ estimated_size = (
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
+ / (state['fragment_index'] + 1) * total_frags)
+ state['total_bytes_estimate'] = estimated_size
+
+ if s['status'] == 'finished':
+ state['fragment_index'] += 1
+ ctx['fragment_index'] = state['fragment_index']
+ state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
+ ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+ ctx['prev_frag_downloaded_bytes'] = 0
+ else:
+ frag_downloaded_bytes = s['downloaded_bytes']
+ state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+ if not ctx['live']:
+ state['eta'] = self.calc_eta(
+ start, time_now, estimated_size - resume_len,
+ state['downloaded_bytes'] - resume_len)
+ state['speed'] = s.get('speed') or ctx.get('speed')
+ ctx['speed'] = state['speed']
+ ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
+ self._hook_progress(state)
+
+ ctx['dl'].add_progress_hook(frag_progress_hook)
+
+ return start
+
+ def _finish_frag_download(self, ctx):
+ ctx['dest_stream'].close()
+ if self.__do_ytdl_file(ctx):
+ ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
+ if os.path.isfile(ytdl_filename):
+ os.remove(ytdl_filename)
+ elapsed = time.time() - ctx['started']
+
+ if ctx['tmpfilename'] == '-':
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+ else:
+ self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ if self.params.get('updatetime', True):
+ filetime = ctx.get('fragment_filetime')
+ if filetime:
+ try:
+ os.utime(ctx['filename'], (time.time(), filetime))
+ except Exception:
+ pass
+ downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_bytes,
+ 'total_bytes': downloaded_bytes,
+ 'filename': ctx['filename'],
+ 'status': 'finished',
+ 'elapsed': elapsed,
+ })
diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py
new file mode 100644
index 0000000..7aaebc9
--- /dev/null
+++ b/hypervideo_dl/downloader/hls.py
@@ -0,0 +1,216 @@
+from __future__ import unicode_literals
+
+import re
+import binascii
+try:
+ from Crypto.Cipher import AES
+ can_decrypt_frag = True
+except ImportError:
+ can_decrypt_frag = False
+
+from .fragment import FragmentFD
+from .external import FFmpegFD
+
+from ..compat import (
+ compat_urllib_error,
+ compat_urlparse,
+ compat_struct_pack,
+)
+from ..utils import (
+ parse_m3u8_attributes,
+ update_url_query,
+)
+
+
+class HlsFD(FragmentFD):
+ """ A limited implementation that does not require ffmpeg """
+
+ FD_NAME = 'hlsnative'
+
+ @staticmethod
+ def can_download(manifest, info_dict):
+ UNSUPPORTED_FEATURES = (
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
+
+ # Live streams heuristic does not always work (e.g. geo restricted to Germany
+ # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
+ # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
+
+ # This heuristic also is not correct since segments may not be appended as well.
+ # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
+ # no segments will definitely be appended to the end of the playlist.
+ # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
+ # # event media playlists [4]
+ r'#EXT-X-MAP:', # media initialization [5]
+
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+ # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+ # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
+ # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
+ )
+ check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+ is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
+ check_results.append(can_decrypt_frag or not is_aes128_enc)
+ check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
+ check_results.append(not info_dict.get('is_live'))
+ return all(check_results)
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.geturl()
+ s = urlh.read().decode('utf-8', 'ignore')
+
+ if not self.can_download(s, info_dict):
+ if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
+ self.report_error('pycrypto not found. Please install it.')
+ return False
+ self.report_warning(
+ 'hlsnative has detected features it does not support, '
+ 'extraction will be delegated to ffmpeg')
+ fd = FFmpegFD(self.ydl, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ def is_ad_fragment_start(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+
+ def is_ad_fragment_end(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+
+ media_frags = 0
+ ad_frags = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('#'):
+ if is_ad_fragment_start(line):
+ ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
+ continue
+ if ad_frag_next:
+ ad_frags += 1
+ continue
+ media_frags += 1
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': media_frags,
+ 'ad_frags': ad_frags,
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+ test = self.params.get('test', False)
+
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
+ i = 0
+ media_sequence = 0
+ decrypt_info = {'METHOD': 'NONE'}
+ byte_range = {}
+ frag_index = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if line:
+ if not line.startswith('#'):
+ if ad_frag_next:
+ continue
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ frag_url = (
+ line
+ if re.match(r'^https?://', line)
+ else compat_urlparse.urljoin(man_url, line))
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+ count = 0
+ headers = info_dict.get('http_headers', {})
+ if byte_range:
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(
+ ctx, frag_url, info_dict, headers)
+ if not success:
+ return False
+ break
+ except compat_urllib_error.HTTPError as err:
+ # Unavailable (possibly temporary) fragments may be served.
+ # First we try to retry then either skip or abort.
+ # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+ # https://github.com/ytdl-org/youtube-dl/issues/10448).
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ i += 1
+ media_sequence += 1
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error(
+ 'giving up after %s fragment retries' % fragment_retries)
+ return False
+ if decrypt_info['METHOD'] == 'AES-128':
+ iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+ decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+ self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
+ # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
+ # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
+ # not what it decrypts to.
+ if not test:
+ frag_content = AES.new(
+ decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+ self._append_fragment(ctx, frag_content)
+ # We only download the first fragment during the test
+ if test:
+ break
+ i += 1
+ media_sequence += 1
+ elif line.startswith('#EXT-X-KEY'):
+ decrypt_url = decrypt_info.get('URI')
+ decrypt_info = parse_m3u8_attributes(line[11:])
+ if decrypt_info['METHOD'] == 'AES-128':
+ if 'IV' in decrypt_info:
+ decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
+ if not re.match(r'^https?://', decrypt_info['URI']):
+ decrypt_info['URI'] = compat_urlparse.urljoin(
+ man_url, decrypt_info['URI'])
+ if extra_query:
+ decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
+ if decrypt_url != decrypt_info['URI']:
+ decrypt_info['KEY'] = None
+ elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+ media_sequence = int(line[22:])
+ elif line.startswith('#EXT-X-BYTERANGE'):
+ splitted_byte_range = line[17:].split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+ elif is_ad_fragment_start(line):
+ ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py
new file mode 100644
index 0000000..d8ac41d
--- /dev/null
+++ b/hypervideo_dl/downloader/http.py
@@ -0,0 +1,364 @@
+from __future__ import unicode_literals
+
+import errno
+import os
+import socket
+import time
+import random
+import re
+
+from .common import FileDownloader
+from ..compat import (
+ compat_str,
+ compat_urllib_error,
+)
+from ..utils import (
+ ContentTooShortError,
+ encodeFilename,
+ int_or_none,
+ sanitize_open,
+ sanitized_Request,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+)
+
+
+class HttpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+
+ class DownloadContext(dict):
+ __getattr__ = dict.get
+ __setattr__ = dict.__setitem__
+ __delattr__ = dict.__delitem__
+
+ ctx = DownloadContext()
+ ctx.filename = filename
+ ctx.tmpfilename = self.temp_name(filename)
+ ctx.stream = None
+
+ # Do not include the Accept-Encoding header
+ headers = {'Youtubedl-no-compression': 'True'}
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ headers.update(add_headers)
+
+ is_test = self.params.get('test', False)
+ chunk_size = self._TEST_FILE_SIZE if is_test else (
+ info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or self.params.get('http_chunk_size') or 0)
+
+ ctx.open_mode = 'wb'
+ ctx.resume_len = 0
+ ctx.data_len = None
+ ctx.block_size = self.params.get('buffersize', 1024)
+ ctx.start_time = time.time()
+ ctx.chunk_size = None
+
+ if self.params.get('continuedl', True):
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+ ctx.resume_len = os.path.getsize(
+ encodeFilename(ctx.tmpfilename))
+
+ ctx.is_resume = ctx.resume_len > 0
+
+ count = 0
+ retries = self.params.get('retries', 0)
+
+ class SucceedDownload(Exception):
+ pass
+
+ class RetryDownload(Exception):
+ def __init__(self, source_error):
+ self.source_error = source_error
+
+ class NextFragment(Exception):
+ pass
+
+ def set_range(req, start, end):
+ range_header = 'bytes=%d-' % start
+ if end:
+ range_header += compat_str(end)
+ req.add_header('Range', range_header)
+
+ def establish_connection():
+ ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
+ if not is_test and chunk_size else chunk_size)
+ if ctx.resume_len > 0:
+ range_start = ctx.resume_len
+ if ctx.is_resume:
+ self.report_resuming_byte(ctx.resume_len)
+ ctx.open_mode = 'ab'
+ elif ctx.chunk_size > 0:
+ range_start = 0
+ else:
+ range_start = None
+ ctx.is_resume = False
+ range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
+ if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
+ range_end = ctx.data_len - 1
+ has_range = range_start is not None
+ ctx.has_range = has_range
+ request = sanitized_Request(url, None, headers)
+ if has_range:
+ set_range(request, range_start, range_end)
+ # Establish connection
+ try:
+ try:
+ ctx.data = self.ydl.urlopen(request)
+ except (compat_urllib_error.URLError, ) as err:
+ # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
+ reason = getattr(err, 'reason', None)
+ if isinstance(reason, socket.timeout):
+ raise RetryDownload(err)
+ raise err
+ # When trying to resume, Content-Range HTTP header of response has to be checked
+ # to match the value of requested Range HTTP header. This is due to a webservers
+ # that don't support resuming and serve a whole file with no Content-Range
+ # set in response despite of requested Range (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
+ if has_range:
+ content_range = ctx.data.headers.get('Content-Range')
+ if content_range:
+ content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
+ # Content-Range is present and matches requested Range, resume is possible
+ if content_range_m:
+ if range_start == int(content_range_m.group(1)):
+ content_range_end = int_or_none(content_range_m.group(2))
+ content_len = int_or_none(content_range_m.group(3))
+ accept_content_len = (
+ # Non-chunked download
+ not ctx.chunk_size
+ # Chunked download and requested piece or
+ # its part is promised to be served
+ or content_range_end == range_end
+ or content_len < range_end)
+ if accept_content_len:
+ ctx.data_len = content_len
+ return
+ # Content-Range is either not present or invalid. Assuming remote webserver is
+ # trying to send the whole file, resume is not possible, so wiping the local file
+ # and performing entire redownload
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
+ return
+ except (compat_urllib_error.HTTPError, ) as err:
+ if err.code == 416:
+ # Unable to resume (requested range not satisfiable)
+ try:
+ # Open the connection again without the range header
+ ctx.data = self.ydl.urlopen(
+ sanitized_Request(url, None, headers))
+ content_length = ctx.data.info()['Content-Length']
+ except (compat_urllib_error.HTTPError, ) as err:
+ if err.code < 500 or err.code >= 600:
+ raise
+ else:
+ # Examine the reported length
+ if (content_length is not None
+ and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
+ # The file had already been fully downloaded.
+ # Explanation to the above condition: in issue #175 it was revealed that
+ # YouTube sometimes adds or removes a few bytes from the end of the file,
+ # changing the file size slightly and causing problems for some users. So
+ # I decided to implement a suggested change and consider the file
+ # completely downloaded if the file size differs less than 100 bytes from
+ # the one in the hard drive.
+ self.report_file_already_downloaded(ctx.filename)
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+ self._hook_progress({
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'downloaded_bytes': ctx.resume_len,
+ 'total_bytes': ctx.resume_len,
+ })
+ raise SucceedDownload()
+ else:
+ # The length does not match, we start the download over
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ return
+ elif err.code < 500 or err.code >= 600:
+ # Unexpected HTTP error
+ raise
+ raise RetryDownload(err)
+ except socket.error as err:
+ if err.errno != errno.ECONNRESET:
+ # Connection reset is no problem, just retry
+ raise
+ raise RetryDownload(err)
+
+ def download():
+ data_len = ctx.data.info().get('Content-length', None)
+
+ # Range HTTP header may be ignored/unsupported by a webserver
+ # (e.g. extractor/scivee.py, extractor/bambuser.py).
+ # However, for a test we still would like to download just a piece of a file.
+ # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+ # block size when downloading a file.
+ if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+ data_len = self._TEST_FILE_SIZE
+
+ if data_len is not None:
+ data_len = int(data_len) + ctx.resume_len
+ min_data_len = self.params.get('min_filesize')
+ max_data_len = self.params.get('max_filesize')
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ return False
+
+ byte_counter = 0 + ctx.resume_len
+ block_size = ctx.block_size
+ start = time.time()
+
+ # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+ now = None # needed for slow_down() in the first loop run
+ before = start # start measuring
+
+ def retry(e):
+ to_stdout = ctx.tmpfilename == '-'
+ if ctx.stream is not None:
+ if not to_stdout:
+ ctx.stream.close()
+ ctx.stream = None
+ ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
+ raise RetryDownload(e)
+
+ while True:
+ try:
+ # Download and write
+ data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
+ # socket.timeout is a subclass of socket.error but may not have
+ # errno set
+ except socket.timeout as e:
+ retry(e)
+ except socket.error as e:
+ # SSLError on python 2 (inherits socket.error) may have
+ # no errno set but this error message
+ if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
+ retry(e)
+ raise
+
+ byte_counter += len(data_block)
+
+ # exit loop when download is finished
+ if len(data_block) == 0:
+ break
+
+ # Open destination file just in time
+ if ctx.stream is None:
+ try:
+ ctx.stream, ctx.tmpfilename = sanitize_open(
+ ctx.tmpfilename, ctx.open_mode)
+ assert ctx.stream is not None
+ ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+ self.report_destination(ctx.filename)
+ except (OSError, IOError) as err:
+ self.report_error('unable to open for writing: %s' % str(err))
+ return False
+
+ if self.params.get('xattr_set_filesize', False) and data_len is not None:
+ try:
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
+ self.report_error('unable to set filesize xattr: %s' % str(err))
+
+ try:
+ ctx.stream.write(data_block)
+ except (IOError, OSError) as err:
+ self.to_stderr('\n')
+ self.report_error('unable to write data: %s' % str(err))
+ return False
+
+ # Apply rate limit
+ self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+ # end measuring of one loop run
+ now = time.time()
+ after = now
+
+ # Adjust block size
+ if not self.params.get('noresizebuffer', False):
+ block_size = self.best_block_size(after - before, len(data_block))
+
+ before = after
+
+ # Progress message
+ speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+ if ctx.data_len is None:
+ eta = None
+ else:
+ eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': ctx.data_len,
+ 'tmpfilename': ctx.tmpfilename,
+ 'filename': ctx.filename,
+ 'eta': eta,
+ 'speed': speed,
+ 'elapsed': now - ctx.start_time,
+ })
+
+ if data_len is not None and byte_counter == data_len:
+ break
+
+ if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+ ctx.resume_len = byte_counter
+ # ctx.block_size = block_size
+ raise NextFragment()
+
+ if ctx.stream is None:
+ self.to_stderr('\n')
+ self.report_error('Did not get any data blocks')
+ return False
+ if ctx.tmpfilename != '-':
+ ctx.stream.close()
+
+ if data_len is not None and byte_counter != data_len:
+ err = ContentTooShortError(byte_counter, int(data_len))
+ if count <= retries:
+ retry(err)
+ raise err
+
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+
+ # Update file modification time
+ if self.params.get('updatetime', True):
+ info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - ctx.start_time,
+ })
+
+ return True
+
+ while count <= retries:
+ try:
+ establish_connection()
+ return download()
+ except RetryDownload as e:
+ count += 1
+ if count <= retries:
+ self.report_retry(e.source_error, count, retries)
+ continue
+ except NextFragment:
+ continue
+ except SucceedDownload:
+ return True
+
+ self.report_error('giving up after %s retries' % retries)
+ return False
diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py
new file mode 100644
index 0000000..1ca666b
--- /dev/null
+++ b/hypervideo_dl/downloader/ism.py
@@ -0,0 +1,259 @@
+from __future__ import unicode_literals
+
+import time
+import binascii
+import io
+
+from .fragment import FragmentFD
+from ..compat import (
+ compat_Struct,
+ compat_urllib_error,
+)
+
+
+u8 = compat_Struct('>B')
+u88 = compat_Struct('>Bx')
+u16 = compat_Struct('>H')
+u1616 = compat_Struct('>Hxx')
+u32 = compat_Struct('>I')
+u64 = compat_Struct('>Q')
+
+s88 = compat_Struct('>bx')
+s16 = compat_Struct('>h')
+s1616 = compat_Struct('>hxx')
+s32 = compat_Struct('>i')
+
+unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
+
+TRACK_ENABLED = 0x1
+TRACK_IN_MOVIE = 0x2
+TRACK_IN_PREVIEW = 0x4
+
+SELF_CONTAINED = 0x1
+
+
+def box(box_type, payload):
+ return u32.pack(8 + len(payload)) + box_type + payload
+
+
+def full_box(box_type, version, flags, payload):
+ return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
+
+
+def write_piff_header(stream, params):
+ track_id = params['track_id']
+ fourcc = params['fourcc']
+ duration = params['duration']
+ timescale = params.get('timescale', 10000000)
+ language = params.get('language', 'und')
+ height = params.get('height', 0)
+ width = params.get('width', 0)
+ is_audio = width == 0 and height == 0
+ creation_time = modification_time = int(time.time())
+
+ ftyp_payload = b'isml' # major brand
+ ftyp_payload += u32.pack(1) # minor version
+ ftyp_payload += b'piff' + b'iso2' # compatible brands
+ stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
+
+ mvhd_payload = u64.pack(creation_time)
+ mvhd_payload += u64.pack(modification_time)
+ mvhd_payload += u32.pack(timescale)
+ mvhd_payload += u64.pack(duration)
+ mvhd_payload += s1616.pack(1) # rate
+ mvhd_payload += s88.pack(1) # volume
+ mvhd_payload += u16.pack(0) # reserved
+ mvhd_payload += u32.pack(0) * 2 # reserved
+ mvhd_payload += unity_matrix
+ mvhd_payload += u32.pack(0) * 6 # pre defined
+ mvhd_payload += u32.pack(0xffffffff) # next track id
+ moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
+
+ tkhd_payload = u64.pack(creation_time)
+ tkhd_payload += u64.pack(modification_time)
+ tkhd_payload += u32.pack(track_id) # track id
+ tkhd_payload += u32.pack(0) # reserved
+ tkhd_payload += u64.pack(duration)
+ tkhd_payload += u32.pack(0) * 2 # reserved
+ tkhd_payload += s16.pack(0) # layer
+ tkhd_payload += s16.pack(0) # alternate group
+ tkhd_payload += s88.pack(1 if is_audio else 0) # volume
+ tkhd_payload += u16.pack(0) # reserved
+ tkhd_payload += unity_matrix
+ tkhd_payload += u1616.pack(width)
+ tkhd_payload += u1616.pack(height)
+ trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
+
+ mdhd_payload = u64.pack(creation_time)
+ mdhd_payload += u64.pack(modification_time)
+ mdhd_payload += u32.pack(timescale)
+ mdhd_payload += u64.pack(duration)
+ mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
+ mdhd_payload += u16.pack(0) # pre defined
+ mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
+
+ hdlr_payload = u32.pack(0) # pre defined
+ hdlr_payload += b'soun' if is_audio else b'vide' # handler type
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
+ mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
+
+ if is_audio:
+ smhd_payload = s88.pack(0) # balance
+ smhd_payload += u16.pack(0) # reserved
+ media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
+ else:
+ vmhd_payload = u16.pack(0) # graphics mode
+ vmhd_payload += u16.pack(0) * 3 # opcolor
+ media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
+ minf_payload = media_header_box
+
+ dref_payload = u32.pack(1) # entry count
+ dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
+ dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
+ minf_payload += box(b'dinf', dinf_payload) # Data Information Box
+
+ stsd_payload = u32.pack(1) # entry count
+
+ sample_entry_payload = u8.pack(0) * 6 # reserved
+ sample_entry_payload += u16.pack(1) # data reference index
+ if is_audio:
+ sample_entry_payload += u32.pack(0) * 2 # reserved
+ sample_entry_payload += u16.pack(params.get('channels', 2))
+ sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u1616.pack(params['sampling_rate'])
+
+ if fourcc == 'AACL':
+ sample_entry_box = box(b'mp4a', sample_entry_payload)
+ else:
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u32.pack(0) * 3 # pre defined
+ sample_entry_payload += u16.pack(width)
+ sample_entry_payload += u16.pack(height)
+ sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
+ sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
+ sample_entry_payload += u32.pack(0) # reserved
+ sample_entry_payload += u16.pack(1) # frame count
+ sample_entry_payload += u8.pack(0) * 32 # compressor name
+ sample_entry_payload += u16.pack(0x18) # depth
+ sample_entry_payload += s16.pack(-1) # pre defined
+
+ codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
+ if fourcc in ('H264', 'AVC1'):
+ sps, pps = codec_private_data.split(u32.pack(1))[1:]
+ avcc_payload = u8.pack(1) # configuration version
+ avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
+ avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
+ avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
+ avcc_payload += u16.pack(len(sps))
+ avcc_payload += sps
+ avcc_payload += u8.pack(1) # number of pps
+ avcc_payload += u16.pack(len(pps))
+ avcc_payload += pps
+ sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
+ sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
+ stsd_payload += sample_entry_box
+
+ stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
+
+ stts_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
+
+ stsc_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
+
+ stco_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
+
+ minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
+
+ mdia_payload += box(b'minf', minf_payload) # Media Information Box
+
+ trak_payload += box(b'mdia', mdia_payload) # Media Box
+
+ moov_payload += box(b'trak', trak_payload) # Track Box
+
+ mehd_payload = u64.pack(duration)
+ mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
+
+ trex_payload = u32.pack(track_id) # track id
+ trex_payload += u32.pack(1) # default sample description index
+ trex_payload += u32.pack(0) # default sample duration
+ trex_payload += u32.pack(0) # default sample size
+ trex_payload += u32.pack(0) # default sample flags
+ mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
+
+ moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
+ stream.write(box(b'moov', moov_payload)) # Movie Box
+
+
+def extract_box_data(data, box_sequence):
+ data_reader = io.BytesIO(data)
+ while True:
+ box_size = u32.unpack(data_reader.read(4))[0]
+ box_type = data_reader.read(4)
+ if box_type == box_sequence[0]:
+ box_data = data_reader.read(box_size - 8)
+ if len(box_sequence) == 1:
+ return box_data
+ return extract_box_data(box_data, box_sequence[1:])
+ data_reader.seek(box_size - 8, 1)
+
+
+class IsmFD(FragmentFD):
+ """
+ Download segments in a ISM manifest
+ """
+
+ FD_NAME = 'ism'
+
+ def real_download(self, filename, info_dict):
+ segments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(segments),
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ track_written = False
+ frag_index = 0
+ for i, segment in enumerate(segments):
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+ if not success:
+ return False
+ if not track_written:
+ tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
+ info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
+ write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
+ track_written = True
+ self._append_fragment(ctx, frag_content)
+ break
+ except compat_urllib_error.HTTPError as err:
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py
new file mode 100644
index 0000000..fbb7f51
--- /dev/null
+++ b/hypervideo_dl/downloader/rtmp.py
@@ -0,0 +1,214 @@
+from __future__ import unicode_literals
+
+import os
+import re
+import subprocess
+import time
+
+from .common import FileDownloader
+from ..compat import compat_str
+from ..utils import (
+ check_executable,
+ encodeFilename,
+ encodeArgument,
+ get_exe_version,
+)
+
+
+def rtmpdump_version():
+ return get_exe_version(
+ 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
+
+
+class RtmpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ def run_rtmpdump(args):
+ start = time.time()
+ resume_percent = None
+ resume_downloaded_data_len = None
+ proc = subprocess.Popen(args, stderr=subprocess.PIPE)
+ cursor_in_new_line = True
+ proc_stderr_closed = False
+ try:
+ while not proc_stderr_closed:
+ # read line from stderr
+ line = ''
+ while True:
+ char = proc.stderr.read(1)
+ if not char:
+ proc_stderr_closed = True
+ break
+ if char in [b'\r', b'\n']:
+ break
+ line += char.decode('ascii', 'replace')
+ if not line:
+ # proc_stderr_closed is True
+ continue
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ percent = float(mobj.group(2))
+ if not resume_percent:
+ resume_percent = percent
+ resume_downloaded_data_len = downloaded_data_len
+ time_now = time.time()
+ eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
+ data_len = None
+ if percent > 0:
+ data_len = int(downloaded_data_len * 100 / percent)
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': downloaded_data_len,
+ 'total_bytes_estimate': data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'eta': eta,
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ })
+ cursor_in_new_line = False
+ else:
+ # no percent for live streams
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ time_now = time.time()
+ speed = self.calc_speed(start, time_now, downloaded_data_len)
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ })
+ cursor_in_new_line = False
+ elif self.params.get('verbose', False):
+ if not cursor_in_new_line:
+ self.to_screen('')
+ cursor_in_new_line = True
+ self.to_screen('[rtmpdump] ' + line)
+ finally:
+ proc.wait()
+ if not cursor_in_new_line:
+ self.to_screen('')
+ return proc.returncode
+
+ url = info_dict['url']
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
+ live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
+ protocol = info_dict.get('rtmp_protocol')
+ real_time = info_dict.get('rtmp_real_time', False)
+ no_resume = info_dict.get('no_resume', False)
+ continue_dl = self.params.get('continuedl', True)
+
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ test = self.params.get('test', False)
+
+ # Check for rtmpdump first
+ if not check_executable('rtmpdump', ['-h']):
+ self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
+ return False
+
+ # Download using rtmpdump. rtmpdump returns exit code 2 when
+ # the connection was interrupted and resuming appears to be
+ # possible. This is part of rtmpdump's normal usage, AFAIK.
+ basic_args = [
+ 'rtmpdump', '--verbose', '-r', url,
+ '-o', tmpfilename]
+ if player_url is not None:
+ basic_args += ['--swfVfy', player_url]
+ if page_url is not None:
+ basic_args += ['--pageUrl', page_url]
+ if app is not None:
+ basic_args += ['--app', app]
+ if play_path is not None:
+ basic_args += ['--playpath', play_path]
+ if tc_url is not None:
+ basic_args += ['--tcUrl', tc_url]
+ if test:
+ basic_args += ['--stop', '1']
+ if flash_version is not None:
+ basic_args += ['--flashVer', flash_version]
+ if live:
+ basic_args += ['--live']
+ if isinstance(conn, list):
+ for entry in conn:
+ basic_args += ['--conn', entry]
+ elif isinstance(conn, compat_str):
+ basic_args += ['--conn', conn]
+ if protocol is not None:
+ basic_args += ['--protocol', protocol]
+ if real_time:
+ basic_args += ['--realtime']
+
+ args = basic_args
+ if not no_resume and continue_dl and not live:
+ args += ['--resume']
+ if not live and continue_dl:
+ args += ['--skip', '1']
+
+ args = [encodeArgument(a) for a in args]
+
+ self._debug_cmd(args, exe='rtmpdump')
+
+ RD_SUCCESS = 0
+ RD_FAILED = 1
+ RD_INCOMPLETE = 2
+ RD_NO_CONNECT = 3
+
+ started = time.time()
+
+ try:
+ retval = run_rtmpdump(args)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ retval = RD_SUCCESS
+ self.to_screen('\n[rtmpdump] Interrupted by user')
+
+ if retval == RD_NO_CONNECT:
+ self.report_error('[rtmpdump] Could not connect to RTMP server.')
+ return False
+
+ while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
+ prevsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
+ time.sleep(5.0) # This seems to be needed
+ args = basic_args + ['--resume']
+ if retval == RD_FAILED:
+ args += ['--skip', '1']
+ args = [encodeArgument(a) for a in args]
+ retval = run_rtmpdump(args)
+ cursize = os.path.getsize(encodeFilename(tmpfilename))
+ if prevsize == cursize and retval == RD_FAILED:
+ break
+ # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
+ if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
+ self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+ retval = RD_SUCCESS
+ break
+ if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - started,
+ })
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('rtmpdump exited with code %d' % retval)
+ return False
diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py
new file mode 100644
index 0000000..939358b
--- /dev/null
+++ b/hypervideo_dl/downloader/rtsp.py
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+import os
+import subprocess
+
+from .common import FileDownloader
+from ..utils import (
+ check_executable,
+ encodeFilename,
+)
+
+
+class RtspFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+
+ if check_executable('mplayer', ['-h']):
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
+ elif check_executable('mpv', ['-h']):
+ args = [
+ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+ else:
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
+ return False
+
+ self._debug_cmd(args)
+
+ retval = subprocess.call(args)
+ if retval == 0:
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (args[0], retval))
+ return False
diff --git a/hypervideo_dl/extractor/__init__.py b/hypervideo_dl/extractor/__init__.py
new file mode 100644
index 0000000..18d8dbc
--- /dev/null
+++ b/hypervideo_dl/extractor/__init__.py
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals
+
+try:
+ from .lazy_extractors import *
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
+except ImportError:
+ _LAZY_LOADER = False
+ from .extractors import *
+
+ _ALL_CLASSES = [
+ klass
+ for name, klass in globals().items()
+ if name.endswith('IE') and name != 'GenericIE'
+ ]
+ _ALL_CLASSES.append(GenericIE)
+
+
+def gen_extractor_classes():
+ """ Return a list of supported extractors.
+ The order does matter; the first extractor matched is the one handling the URL.
+ """
+ return _ALL_CLASSES
+
+
+def gen_extractors():
+ """ Return a list of an instance of every supported extractor.
+ The order does matter; the first extractor matched is the one handling the URL.
+ """
+ return [klass() for klass in gen_extractor_classes()]
+
+
+def list_extractors(age_limit):
+ """
+ Return a list of extractors that are suitable for the given age,
+ sorted by extractor ID.
+ """
+
+ return sorted(
+ filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+ key=lambda ie: ie.IE_NAME.lower())
+
+
+def get_info_extractor(ie_name):
+ """Returns the info extractor class with the given ie_name"""
+ return globals()[ie_name + 'IE']
diff --git a/hypervideo_dl/extractor/abc.py b/hypervideo_dl/extractor/abc.py
new file mode 100644
index 0000000..6637f4f
--- /dev/null
+++ b/hypervideo_dl/extractor/abc.py
@@ -0,0 +1,193 @@
+from __future__ import unicode_literals
+
+import hashlib
+import hmac
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+)
+
+
+class ABCIE(InfoExtractor):
+ IE_NAME = 'abc.net.au'
+ _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
+ 'md5': 'cb3dd03b18455a661071ee1e28344d9f',
+ 'info_dict': {
+ 'id': '5868334',
+ 'ext': 'mp4',
+ 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
+ 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
+ },
+ 'skip': 'this video has expired',
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
+ 'md5': 'db2a5369238b51f9811ad815b69dc086',
+ 'info_dict': {
+ 'id': 'NvqvPeNZsHU',
+ 'ext': 'mp4',
+ 'upload_date': '20150816',
+ 'uploader': 'ABC News (Australia)',
+ 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
+ 'uploader_id': 'NewsOnABC',
+ 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'Not accessible from Travis CI server',
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
+ 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
+ 'info_dict': {
+ 'id': '6880080',
+ 'ext': 'mp3',
+ 'title': 'NAB lifts interest rates, following Westpac and CBA',
+ 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
+ },
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ mobj = re.search(
+ r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
+ webpage)
+ if mobj is None:
+ expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
+ if expired:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
+ raise ExtractorError('Unable to extract video urls')
+
+ urls_info = self._parse_json(
+ mobj.group('json_data'), video_id, transform_source=js_to_json)
+
+ if not isinstance(urls_info, list):
+ urls_info = [urls_info]
+
+ if mobj.group('type') == 'YouTube':
+ return self.playlist_result([
+ self.url_result(url_info['url']) for url_info in urls_info])
+
+ formats = [{
+ 'url': url_info['url'],
+ 'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
+ 'width': int_or_none(url_info.get('width')),
+ 'height': int_or_none(url_info.get('height')),
+ 'tbr': int_or_none(url_info.get('bitrate')),
+ 'filesize': int_or_none(url_info.get('filesize')),
+ } for url_info in urls_info]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
+
+
+class ABCIViewIE(InfoExtractor):
+ IE_NAME = 'abc.net.au:iview'
+ _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
+
+ # ABC iview programs are normally available for 14 days only.
+ _TESTS = [{
+ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
+ 'md5': '67715ce3c78426b11ba167d875ac6abf',
+ 'info_dict': {
+ 'id': 'LE1927H001S00',
+ 'ext': 'mp4',
+ 'title': "Series 11 Ep 1",
+ 'series': "Gruen",
+ 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
+ 'upload_date': '20190925',
+ 'uploader_id': 'abc1',
+ 'timestamp': 1569445289,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_params = self._download_json(
+ 'https://iview.abc.net.au/api/programs/' + video_id, video_id)
+ title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
+ stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
+
+ house_number = video_params.get('episodeHouseNumber') or video_id
+ path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
+ int(time.time()), house_number)
+ sig = hmac.new(
+ b'android.content.res.Resources',
+ path.encode('utf-8'), hashlib.sha256).hexdigest()
+ token = self._download_webpage(
+ 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
+
+ def tokenize_url(url, token):
+ return update_url_query(url, {
+ 'hdnea': token,
+ })
+
+ for sd in ('720', 'sd', 'sd-low'):
+ sd_url = try_get(
+ stream, lambda x: x['streams']['hls'][sd], compat_str)
+ if not sd_url:
+ continue
+ formats = self._extract_m3u8_formats(
+ tokenize_url(sd_url, token), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ if formats:
+ break
+ self._sort_formats(formats)
+
+ subtitles = {}
+ src_vtt = stream.get('captions', {}).get('src-vtt')
+ if src_vtt:
+ subtitles['en'] = [{
+ 'url': src_vtt,
+ 'ext': 'vtt',
+ }]
+
+ is_live = video_params.get('livestream') == '1'
+ if is_live:
+ title = self._live_title(title)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_params.get('description'),
+ 'thumbnail': video_params.get('thumbnail'),
+ 'duration': int_or_none(video_params.get('eventDuration')),
+ 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
+ 'series': unescapeHTML(video_params.get('seriesTitle')),
+ 'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
+ 'season_number': int_or_none(self._search_regex(
+ r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
+ 'episode_number': int_or_none(self._search_regex(
+ r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
+ 'episode_id': house_number,
+ 'uploader_id': video_params.get('channel'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/abcnews.py b/hypervideo_dl/extractor/abcnews.py
new file mode 100644
index 0000000..908c833
--- /dev/null
+++ b/hypervideo_dl/extractor/abcnews.py
@@ -0,0 +1,158 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .amp import AMPIE
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ try_get,
+)
+
+
+class AbcNewsVideoIE(AMPIE):
+ IE_NAME = 'abcnews:video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ abcnews\.go\.com/
+ (?:
+ (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
+ video/(?:embed|itemfeed)\?.*?\bid=
+ )|
+ fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
+ 'info_dict': {
+ 'id': '20411932',
+ 'ext': 'mp4',
+ 'display_id': 'week-exclusive-irans-foreign-minister-zarif',
+ 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
+ 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
+ 'duration': 180,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1380454200,
+ 'upload_date': '20130929',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://abcnews.go.com/video/embed?id=46979033',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('display_id')
+ video_id = mobj.group('id')
+ info_dict = self._extract_feed_info(
+ 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
+ info_dict.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+ return info_dict
+
+
+class AbcNewsIE(InfoExtractor):
+ IE_NAME = 'abcnews'
+ _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
+
+ _TESTS = [{
+ # Youtube Embeds
+ 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
+ 'info_dict': {
+ 'id': '51286501',
+ 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+ 'description': 'Billingsley went from a child actor to Hollywood power player.',
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
+ 'info_dict': {
+ 'id': '38897857',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake Drops Hints For Secret Single',
+ 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
+ 'upload_date': '20160505',
+ 'timestamp': 1462442280,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ # The embedded YouTube video is blocked due to copyright issues
+ 'playlist_items': '1',
+ },
+ 'add_ie': ['AbcNewsVideo'],
+ }, {
+ 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+ 'only_matching': True,
+ }, {
+ # inline.type == 'video'
+ 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+ story = self._parse_json(self._search_regex(
+ r"window\['__abcnews__'\]\s*=\s*({.+?});",
+ webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+ article_contents = story.get('articleContents') or {}
+
+ def entries():
+ featured_video = story.get('featuredVideo') or {}
+ feed = try_get(featured_video, lambda x: x['video']['feed'])
+ if feed:
+ yield {
+ '_type': 'url',
+ 'id': featured_video.get('id'),
+ 'title': featured_video.get('name'),
+ 'url': feed,
+ 'thumbnail': featured_video.get('images'),
+ 'description': featured_video.get('description'),
+ 'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+ 'duration': parse_duration(featured_video.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
+
+ for inline in (article_contents.get('inlines') or []):
+ inline_type = inline.get('type')
+ if inline_type == 'iframe':
+ iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+ if iframe_url:
+ yield self.url_result(iframe_url)
+ elif inline_type == 'video':
+ video_id = inline.get('id')
+ if video_id:
+ yield {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+ 'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+ 'description': inline.get('description'),
+ 'duration': parse_duration(inline.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), story_id, article_contents.get('headline'),
+ article_contents.get('subHead'))
diff --git a/hypervideo_dl/extractor/abcotvs.py b/hypervideo_dl/extractor/abcotvs.py
new file mode 100644
index 0000000..0bc69a6
--- /dev/null
+++ b/hypervideo_dl/extractor/abcotvs.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ dict_get,
+ int_or_none,
+ try_get,
+)
+
+
+class ABCOTVSIE(InfoExtractor):
+ IE_NAME = 'abcotvs'
+ IE_DESC = 'ABC Owned Television Stations'
+ _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
+ 'info_dict': {
+ 'id': '472548',
+ 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
+ 'ext': 'mp4',
+ 'title': 'East Bay museum celebrates synthesized music',
+ 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421118520,
+ 'upload_date': '20150113',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://abc7news.com/472581',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
+ 'only_matching': True,
+ },
+ ]
+ _SITE_MAP = {
+ '6abc': 'wpvi',
+ 'abc11': 'wtvd',
+ 'abc13': 'ktrk',
+ 'abc30': 'kfsn',
+ 'abc7': 'kabc',
+ 'abc7chicago': 'wls',
+ 'abc7news': 'kgo',
+ 'abc7ny': 'wabc',
+ }
+
+ def _real_extract(self, url):
+ site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id = display_id or video_id
+ station = self._SITE_MAP[site]
+
+ data = self._download_json(
+ 'https://api.abcotvs.com/v2/content', display_id, query={
+ 'id': video_id,
+ 'key': 'otv.web.%s.story' % station,
+ 'station': station,
+ })['data']
+ video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
+ video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
+ title = video.get('title') or video['linkText']
+
+ formats = []
+ m3u8_url = video.get('m3u8')
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
+ mp4_url = video.get('mp4')
+ if mp4_url:
+ formats.append({
+ 'abr': 128,
+ 'format_id': 'https',
+ 'height': 360,
+ 'url': mp4_url,
+ 'width': 640,
+ })
+ self._sort_formats(formats)
+
+ image = video.get('image') or {}
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
+ 'thumbnail': dict_get(image, ('source', 'dynamicSource')),
+ 'timestamp': int_or_none(video.get('date')),
+ 'duration': int_or_none(video.get('length')),
+ 'formats': formats,
+ }
+
+
+class ABCOTVSClipsIE(InfoExtractor):
+ IE_NAME = 'abcotvs:clips'
+ _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://clips.abcotvs.com/kabc/video/214814',
+ 'info_dict': {
+ 'id': '214814',
+ 'ext': 'mp4',
+ 'title': 'SpaceX launch pad explosion destroys rocket, satellite',
+ 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
+ 'upload_date': '20160901',
+ 'timestamp': 1472756695,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
+ title = video_data['title']
+ formats = self._extract_m3u8_formats(
+ video_data['videoURL'].split('?')[0], video_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailURL'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('pubDate')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/academicearth.py b/hypervideo_dl/extractor/academicearth.py
new file mode 100644
index 0000000..3409550
--- /dev/null
+++ b/hypervideo_dl/extractor/academicearth.py
@@ -0,0 +1,41 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class AcademicEarthCourseIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
+ IE_NAME = 'AcademicEarth:Course'
+ _TEST = {
+ 'url': 'http://academicearth.org/playlists/laws-of-nature/',
+ 'info_dict': {
+ 'id': 'laws-of-nature',
+ 'title': 'Laws of Nature',
+ 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+ },
+ 'playlist_count': 3,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+ title = self._html_search_regex(
+ r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
+ description = self._html_search_regex(
+ r'<p class="excerpt"[^>]*?>(.*?)</p>',
+ webpage, 'description', fatal=False)
+ urls = re.findall(
+ r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
+ webpage)
+ entries = [self.url_result(u) for u in urls]
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/acast.py b/hypervideo_dl/extractor/acast.py
new file mode 100644
index 0000000..b9355a2
--- /dev/null
+++ b/hypervideo_dl/extractor/acast.py
@@ -0,0 +1,126 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class ACastBaseIE(InfoExtractor):
+ def _extract_episode(self, episode, show_info):
+ title = episode['title']
+ info = {
+ 'id': episode['id'],
+ 'display_id': episode.get('episodeUrl'),
+ 'url': clean_podcast_url(episode['url']),
+ 'title': title,
+ 'description': clean_html(episode.get('description') or episode.get('summary')),
+ 'thumbnail': episode.get('image'),
+ 'timestamp': parse_iso8601(episode.get('publishDate')),
+ 'duration': int_or_none(episode.get('duration')),
+ 'filesize': int_or_none(episode.get('contentLength')),
+ 'season_number': int_or_none(episode.get('season')),
+ 'episode': title,
+ 'episode_number': int_or_none(episode.get('episode')),
+ }
+ info.update(show_info)
+ return info
+
+ def _extract_show_info(self, show):
+ return {
+ 'creator': show.get('author'),
+ 'series': show.get('title'),
+ }
+
+ def _call_api(self, path, video_id, query=None):
+ return self._download_json(
+ 'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
+
+
+class ACastIE(ACastBaseIE):
+ IE_NAME = 'acast'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:embed|www)\.)?acast\.com/|
+ play\.acast\.com/s/
+ )
+ (?P<channel>[^/]+)/(?P<id>[^/#?]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
+ 'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
+ 'info_dict': {
+ 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
+ 'ext': 'mp3',
+ 'title': '2. Raggarmordet - Röster ur det förflutna',
+ 'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
+ 'timestamp': 1477346700,
+ 'upload_date': '20161024',
+ 'duration': 2766,
+ 'creator': 'Anton Berg & Martin Johnson',
+ 'series': 'Spår',
+ 'episode': '2. Raggarmordet - Röster ur det förflutna',
+ }
+ }, {
+ 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel, display_id = re.match(self._VALID_URL, url).groups()
+ episode = self._call_api(
+ '%s/episodes/%s' % (channel, display_id),
+ display_id, {'showInfo': 'true'})
+ return self._extract_episode(
+ episode, self._extract_show_info(episode.get('show') or {}))
+
+
+class ACastChannelIE(ACastBaseIE):
+ IE_NAME = 'acast:channel'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?acast\.com/|
+ play\.acast\.com/s/
+ )
+ (?P<id>[^/#?]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.acast.com/todayinfocus',
+ 'info_dict': {
+ 'id': '4efc5294-5385-4847-98bd-519799ce5786',
+ 'title': 'Today in Focus',
+ 'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
+ },
+ 'playlist_mincount': 200,
+ }, {
+ 'url': 'http://play.acast.com/s/ft-banking-weekly',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ show_slug = self._match_id(url)
+ show = self._call_api(show_slug, show_slug)
+ show_info = self._extract_show_info(show)
+ entries = []
+ for episode in (show.get('episodes') or []):
+ entries.append(self._extract_episode(episode, show_info))
+ return self.playlist_result(
+ entries, show.get('id'), show.get('title'), show.get('description'))
diff --git a/hypervideo_dl/extractor/adn.py b/hypervideo_dl/extractor/adn.py
new file mode 100644
index 0000000..a55ebbc
--- /dev/null
+++ b/hypervideo_dl/extractor/adn.py
@@ -0,0 +1,269 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import json
+import os
+import random
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import (
+ compat_HTTPError,
+ compat_b64decode,
+ compat_ord,
+)
+from ..utils import (
+ bytes_to_intlist,
+ bytes_to_long,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ intlist_to_bytes,
+ long_to_bytes,
+ pkcs1pad,
+ strip_or_none,
+ try_get,
+ unified_strdate,
+ urlencode_postdata,
+)
+
+
+class ADNIE(InfoExtractor):
+ IE_DESC = 'Anime Digital Network'
+ _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+ 'md5': '0319c99885ff5547565cacb4f3f9348d',
+ 'info_dict': {
+ 'id': '7778',
+ 'ext': 'mp4',
+ 'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
+ 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
+ 'series': 'Blue Exorcist - Kyôto Saga',
+ 'duration': 1467,
+ 'release_date': '20170106',
+ 'comment_count': int,
+ 'average_rating': float,
+ 'season_number': 2,
+ 'episode': 'Début des hostilités',
+ 'episode_number': 1,
+ }
+ }
+
+ _NETRC_MACHINE = 'animedigitalnetwork'
+ _BASE_URL = 'http://animedigitalnetwork.fr'
+ _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+ _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+ _HEADERS = {}
+ _LOGIN_ERR_MESSAGE = 'Unable to log in'
+ _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
+ _POS_ALIGN_MAP = {
+ 'start': 1,
+ 'end': 3,
+ }
+ _LINE_ALIGN_MAP = {
+ 'middle': 8,
+ 'end': 4,
+ }
+
+ @staticmethod
+ def _ass_subtitles_timecode(seconds):
+ return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
+
+ def _get_subtitles(self, sub_url, video_id):
+ if not sub_url:
+ return None
+
+ enc_subtitles = self._download_webpage(
+ sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
+ subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
+ if subtitle_location:
+ enc_subtitles = self._download_webpage(
+ subtitle_location, video_id, 'Downloading subtitles data',
+ fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
+ if not enc_subtitles:
+ return None
+
+ # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+ dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
+ bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
+ bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
+ bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
+ ))
+ subtitles_json = self._parse_json(
+ dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
+ None, fatal=False)
+ if not subtitles_json:
+ return None
+
+ subtitles = {}
+ for sub_lang, sub in subtitles_json.items():
+ ssa = '''[Script Info]
+ScriptType:V4.00
+[V4 Styles]
+Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
+Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
+[Events]
+Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
+ for current in sub:
+ start, end, text, line_align, position_align = (
+ float_or_none(current.get('startTime')),
+ float_or_none(current.get('endTime')),
+ current.get('text'), current.get('lineAlign'),
+ current.get('positionAlign'))
+ if start is None or end is None or text is None:
+ continue
+ alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
+ ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
+ self._ass_subtitles_timecode(start),
+ self._ass_subtitles_timecode(end),
+ '{\\a%d}' % alignment if alignment != 2 else '',
+ text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
+
+ if sub_lang == 'vostf':
+ sub_lang = 'fr'
+ subtitles.setdefault(sub_lang, []).extend([{
+ 'ext': 'json',
+ 'data': json.dumps(sub),
+ }, {
+ 'ext': 'ssa',
+ 'data': ssa,
+ }])
+ return subtitles
+
+ def _real_initialize(self):
+ username, password = self._get_login_info()
+ if not username:
+ return
+ try:
+ access_token = (self._download_json(
+ self._API_BASE_URL + 'authentication/login', None,
+ 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+ data=urlencode_postdata({
+ 'password': password,
+ 'rememberMe': False,
+ 'source': 'Web',
+ 'username': username,
+ })) or {}).get('accessToken')
+ if access_token:
+ self._HEADERS = {'authorization': 'Bearer ' + access_token}
+ except ExtractorError as e:
+ message = None
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(
+ e.cause.read().decode(), None, fatal=False) or {}
+ message = resp.get('message') or resp.get('code')
+ self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
+ player = self._download_json(
+ video_base_url + 'configuration', video_id,
+ 'Downloading player config JSON metadata',
+ headers=self._HEADERS)['player']
+ options = player['options']
+
+ user = options['user']
+ if not user.get('hasAccess'):
+ self.raise_login_required()
+
+ token = self._download_json(
+ user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
+ video_id, 'Downloading access token', headers={
+ 'x-player-refresh-token': user['refreshToken']
+ }, data=b'')['token']
+
+ links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
+ self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
+ message = bytes_to_intlist(json.dumps({
+ 'k': self._K,
+ 't': token,
+ }))
+
+ # Sometimes authentication fails for no good reason, retry with
+ # a different random padding
+ links_data = None
+ for _ in range(3):
+ padded_message = intlist_to_bytes(pkcs1pad(message, 128))
+ n, e = self._RSA_KEY
+ encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
+ authorization = base64.b64encode(encrypted_message).decode()
+
+ try:
+ links_data = self._download_json(
+ links_url, video_id, 'Downloading links JSON metadata', headers={
+ 'X-Player-Token': authorization
+ }, query={
+ 'freeWithAds': 'true',
+ 'adaptive': 'false',
+ 'withMetadata': 'true',
+ 'source': 'Web'
+ })
+ break
+ except ExtractorError as e:
+ if not isinstance(e.cause, compat_HTTPError):
+ raise e
+
+ if e.cause.code == 401:
+ # This usually goes away with a different random pkcs1pad, so retry
+ continue
+
+ error = self._parse_json(e.cause.read(), video_id)
+ message = error.get('message')
+ if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
+ self.raise_geo_restricted(msg=message)
+ raise ExtractorError(message)
+ else:
+ raise ExtractorError('Giving up retrying')
+
+ links = links_data.get('links') or {}
+ metas = links_data.get('metadata') or {}
+ sub_url = (links.get('subtitles') or {}).get('all')
+ video_info = links_data.get('video') or {}
+ title = metas['title']
+
+ formats = []
+ for format_id, qualities in (links.get('streaming') or {}).items():
+ if not isinstance(qualities, dict):
+ continue
+ for quality, load_balancer_url in qualities.items():
+ load_balancer_data = self._download_json(
+ load_balancer_url, video_id,
+ 'Downloading %s %s JSON metadata' % (format_id, quality),
+ fatal=False) or {}
+ m3u8_url = load_balancer_data.get('location')
+ if not m3u8_url:
+ continue
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False)
+ if format_id == 'vf':
+ for f in m3u8_formats:
+ f['language'] = 'fr'
+ formats.extend(m3u8_formats)
+ self._sort_formats(formats)
+
+ video = (self._download_json(
+ self._API_BASE_URL + 'video/%s' % video_id, video_id,
+ 'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
+ show = video.get('show') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(metas.get('summary') or video.get('summary')),
+ 'thumbnail': video_info.get('image') or player.get('image'),
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(sub_url, video_id),
+ 'episode': metas.get('subtitle') or video.get('name'),
+ 'episode_number': int_or_none(video.get('shortNumber')),
+ 'series': show.get('title'),
+ 'season_number': int_or_none(video.get('season')),
+ 'duration': int_or_none(video_info.get('duration') or video.get('duration')),
+ 'release_date': unified_strdate(video.get('releaseDate')),
+ 'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
+ 'comment_count': int_or_none(video.get('commentsCount')),
+ }
diff --git a/hypervideo_dl/extractor/adobeconnect.py b/hypervideo_dl/extractor/adobeconnect.py
new file mode 100644
index 0000000..728549e
--- /dev/null
+++ b/hypervideo_dl/extractor/adobeconnect.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class AdobeConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+ qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
+ is_live = qs.get('isLive', ['false'])[0] == 'true'
+ formats = []
+ for con_string in qs['conStrings'][0].split(','):
+ formats.append({
+ 'format_id': con_string.split('://')[0],
+ 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
+ 'ext': 'flv',
+ 'play_path': 'mp4:' + qs['streamName'][0],
+ 'rtmp_conn': 'S:' + qs['ticket'][0],
+ 'rtmp_live': is_live,
+ 'url': con_string,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/adobepass.py b/hypervideo_dl/extractor/adobepass.py
new file mode 100644
index 0000000..38dca1b
--- /dev/null
+++ b/hypervideo_dl/extractor/adobepass.py
@@ -0,0 +1,1572 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+import xml.etree.ElementTree as etree
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_kwargs,
+ compat_urlparse,
+)
+from ..utils import (
+ unescapeHTML,
+ urlencode_postdata,
+ unified_timestamp,
+ ExtractorError,
+ NO_DEFAULT,
+)
+
+
+MSO_INFO = {
+ 'DTV': {
+ 'name': 'DIRECTV',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
+ 'ATT': {
+ 'name': 'AT&T U-verse',
+ 'username_field': 'userid',
+ 'password_field': 'password',
+ },
+ 'ATTOTT': {
+ 'name': 'DIRECTV NOW',
+ 'username_field': 'email',
+ 'password_field': 'loginpassword',
+ },
+ 'Rogers': {
+ 'name': 'Rogers',
+ 'username_field': 'UserName',
+ 'password_field': 'UserPassword',
+ },
+ 'Comcast_SSO': {
+ 'name': 'Comcast XFINITY',
+ 'username_field': 'user',
+ 'password_field': 'passwd',
+ },
+ 'TWC': {
+ 'name': 'Time Warner Cable | Spectrum',
+ 'username_field': 'Ecom_User_ID',
+ 'password_field': 'Ecom_Password',
+ },
+ 'Brighthouse': {
+ 'name': 'Bright House Networks | Spectrum',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
+ 'Charter_Direct': {
+ 'name': 'Charter Spectrum',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'Verizon': {
+ 'name': 'Verizon FiOS',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'thr030': {
+ 'name': '3 Rivers Communications'
+ },
+ 'com140': {
+ 'name': 'Access Montana'
+ },
+ 'acecommunications': {
+ 'name': 'AcenTek'
+ },
+ 'acm010': {
+ 'name': 'Acme Communications'
+ },
+ 'ada020': {
+ 'name': 'Adams Cable Service'
+ },
+ 'alb020': {
+ 'name': 'Albany Mutual Telephone'
+ },
+ 'algona': {
+ 'name': 'Algona Municipal Utilities'
+ },
+ 'allwest': {
+ 'name': 'All West Communications'
+ },
+ 'all025': {
+ 'name': 'Allen\'s Communications'
+ },
+ 'spl010': {
+ 'name': 'Alliance Communications'
+ },
+ 'all070': {
+ 'name': 'ALLO Communications'
+ },
+ 'alpine': {
+ 'name': 'Alpine Communications'
+ },
+ 'hun015': {
+ 'name': 'American Broadband'
+ },
+ 'nwc010': {
+ 'name': 'American Broadband Missouri'
+ },
+ 'com130-02': {
+ 'name': 'American Community Networks'
+ },
+ 'com130-01': {
+ 'name': 'American Warrior Networks'
+ },
+ 'tom020': {
+ 'name': 'Amherst Telephone/Tomorrow Valley'
+ },
+ 'tvc020': {
+ 'name': 'Andycable'
+ },
+ 'arkwest': {
+ 'name': 'Arkwest Communications'
+ },
+ 'art030': {
+ 'name': 'Arthur Mutual Telephone Company'
+ },
+ 'arvig': {
+ 'name': 'Arvig'
+ },
+ 'nttcash010': {
+ 'name': 'Ashland Home Net'
+ },
+ 'astound': {
+ 'name': 'Astound (now Wave)'
+ },
+ 'dix030': {
+ 'name': 'ATC Broadband'
+ },
+ 'ara010': {
+ 'name': 'ATC Communications'
+ },
+ 'she030-02': {
+ 'name': 'Ayersville Communications'
+ },
+ 'baldwin': {
+ 'name': 'Baldwin Lightstream'
+ },
+ 'bal040': {
+ 'name': 'Ballard TV'
+ },
+ 'cit025': {
+ 'name': 'Bardstown Cable TV'
+ },
+ 'bay030': {
+ 'name': 'Bay Country Communications'
+ },
+ 'tel095': {
+ 'name': 'Beaver Creek Cooperative Telephone'
+ },
+ 'bea020': {
+ 'name': 'Beaver Valley Cable'
+ },
+ 'bee010': {
+ 'name': 'Bee Line Cable'
+ },
+ 'wir030': {
+ 'name': 'Beehive Broadband'
+ },
+ 'bra020': {
+ 'name': 'BELD'
+ },
+ 'bel020': {
+ 'name': 'Bellevue Municipal Cable'
+ },
+ 'vol040-01': {
+ 'name': 'Ben Lomand Connect / BLTV'
+ },
+ 'bev010': {
+ 'name': 'BEVCOMM'
+ },
+ 'big020': {
+ 'name': 'Big Sandy Broadband'
+ },
+ 'ble020': {
+ 'name': 'Bledsoe Telephone Cooperative'
+ },
+ 'bvt010': {
+ 'name': 'Blue Valley Tele-Communications'
+ },
+ 'bra050': {
+ 'name': 'Brandenburg Telephone Co.'
+ },
+ 'bte010': {
+ 'name': 'Bristol Tennessee Essential Services'
+ },
+ 'annearundel': {
+ 'name': 'Broadstripe'
+ },
+ 'btc010': {
+ 'name': 'BTC Communications'
+ },
+ 'btc040': {
+ 'name': 'BTC Vision - Nahunta'
+ },
+ 'bul010': {
+ 'name': 'Bulloch Telephone Cooperative'
+ },
+ 'but010': {
+ 'name': 'Butler-Bremer Communications'
+ },
+ 'tel160-csp': {
+ 'name': 'C Spire SNAP'
+ },
+ 'csicable': {
+ 'name': 'Cable Services Inc.'
+ },
+ 'cableamerica': {
+ 'name': 'CableAmerica'
+ },
+ 'cab038': {
+ 'name': 'CableSouth Media 3'
+ },
+ 'weh010-camtel': {
+ 'name': 'Cam-Tel Company'
+ },
+ 'car030': {
+ 'name': 'Cameron Communications'
+ },
+ 'canbytel': {
+ 'name': 'Canby Telcom'
+ },
+ 'crt020': {
+ 'name': 'CapRock Tv'
+ },
+ 'car050': {
+ 'name': 'Carnegie Cable'
+ },
+ 'cas': {
+ 'name': 'CAS Cable'
+ },
+ 'casscomm': {
+ 'name': 'CASSCOMM'
+ },
+ 'mid180-02': {
+ 'name': 'Catalina Broadband Solutions'
+ },
+ 'cccomm': {
+ 'name': 'CC Communications'
+ },
+ 'nttccde010': {
+ 'name': 'CDE Lightband'
+ },
+ 'cfunet': {
+ 'name': 'Cedar Falls Utilities'
+ },
+ 'dem010-01': {
+ 'name': 'Celect-Bloomer Telephone Area'
+ },
+ 'dem010-02': {
+ 'name': 'Celect-Bruce Telephone Area'
+ },
+ 'dem010-03': {
+ 'name': 'Celect-Citizens Connected Area'
+ },
+ 'dem010-04': {
+ 'name': 'Celect-Elmwood/Spring Valley Area'
+ },
+ 'dem010-06': {
+ 'name': 'Celect-Mosaic Telecom'
+ },
+ 'dem010-05': {
+ 'name': 'Celect-West WI Telephone Area'
+ },
+ 'net010-02': {
+ 'name': 'Cellcom/Nsight Telservices'
+ },
+ 'cen100': {
+ 'name': 'CentraCom'
+ },
+ 'nttccst010': {
+ 'name': 'Central Scott / CSTV'
+ },
+ 'cha035': {
+ 'name': 'Chaparral CableVision'
+ },
+ 'cha050': {
+ 'name': 'Chariton Valley Communication Corporation, Inc.'
+ },
+ 'cha060': {
+ 'name': 'Chatmoss Cablevision'
+ },
+ 'nttcche010': {
+ 'name': 'Cherokee Communications'
+ },
+ 'che050': {
+ 'name': 'Chesapeake Bay Communications'
+ },
+ 'cimtel': {
+ 'name': 'Cim-Tel Cable, LLC.'
+ },
+ 'cit180': {
+ 'name': 'Citizens Cablevision - Floyd, VA'
+ },
+ 'cit210': {
+ 'name': 'Citizens Cablevision, Inc.'
+ },
+ 'cit040': {
+ 'name': 'Citizens Fiber'
+ },
+ 'cit250': {
+ 'name': 'Citizens Mutual'
+ },
+ 'war040': {
+ 'name': 'Citizens Telephone Corporation'
+ },
+ 'wat025': {
+ 'name': 'City Of Monroe'
+ },
+ 'wadsworth': {
+ 'name': 'CityLink'
+ },
+ 'nor100': {
+ 'name': 'CL Tel'
+ },
+ 'cla010': {
+ 'name': 'Clarence Telephone and Cedar Communications'
+ },
+ 'ser060': {
+ 'name': 'Clear Choice Communications'
+ },
+ 'tac020': {
+ 'name': 'Click! Cable TV'
+ },
+ 'war020': {
+ 'name': 'CLICK1.NET'
+ },
+ 'cml010': {
+ 'name': 'CML Telephone Cooperative Association'
+ },
+ 'cns': {
+ 'name': 'CNS'
+ },
+ 'com160': {
+ 'name': 'Co-Mo Connect'
+ },
+ 'coa020': {
+ 'name': 'Coast Communications'
+ },
+ 'coa030': {
+ 'name': 'Coaxial Cable TV'
+ },
+ 'mid055': {
+ 'name': 'Cobalt TV (Mid-State Community TV)'
+ },
+ 'col070': {
+ 'name': 'Columbia Power & Water Systems'
+ },
+ 'col080': {
+ 'name': 'Columbus Telephone'
+ },
+ 'nor105': {
+ 'name': 'Communications 1 Cablevision, Inc.'
+ },
+ 'com150': {
+ 'name': 'Community Cable & Broadband'
+ },
+ 'com020': {
+ 'name': 'Community Communications Company'
+ },
+ 'coy010': {
+ 'name': 'commZoom'
+ },
+ 'com025': {
+ 'name': 'Complete Communication Services'
+ },
+ 'cat020': {
+ 'name': 'Comporium'
+ },
+ 'com071': {
+ 'name': 'ComSouth Telesys'
+ },
+ 'consolidatedcable': {
+ 'name': 'Consolidated'
+ },
+ 'conwaycorp': {
+ 'name': 'Conway Corporation'
+ },
+ 'coo050': {
+ 'name': 'Coon Valley Telecommunications Inc'
+ },
+ 'coo080': {
+ 'name': 'Cooperative Telephone Company'
+ },
+ 'cpt010': {
+ 'name': 'CP-TEL'
+ },
+ 'cra010': {
+ 'name': 'Craw-Kan Telephone'
+ },
+ 'crestview': {
+ 'name': 'Crestview Cable Communications'
+ },
+ 'cross': {
+ 'name': 'Cross TV'
+ },
+ 'cro030': {
+ 'name': 'Crosslake Communications'
+ },
+ 'ctc040': {
+ 'name': 'CTC - Brainerd MN'
+ },
+ 'phe030': {
+ 'name': 'CTV-Beam - East Alabama'
+ },
+ 'cun010': {
+ 'name': 'Cunningham Telephone & Cable'
+ },
+ 'dpc010': {
+ 'name': 'D & P Communications'
+ },
+ 'dak030': {
+ 'name': 'Dakota Central Telecommunications'
+ },
+ 'nttcdel010': {
+ 'name': 'Delcambre Telephone LLC'
+ },
+ 'tel160-del': {
+ 'name': 'Delta Telephone Company'
+ },
+ 'sal040': {
+ 'name': 'DiamondNet'
+ },
+ 'ind060-dc': {
+ 'name': 'Direct Communications'
+ },
+ 'doy010': {
+ 'name': 'Doylestown Cable TV'
+ },
+ 'dic010': {
+ 'name': 'DRN'
+ },
+ 'dtc020': {
+ 'name': 'DTC'
+ },
+ 'dtc010': {
+ 'name': 'DTC Cable (Delhi)'
+ },
+ 'dum010': {
+ 'name': 'Dumont Telephone Company'
+ },
+ 'dun010': {
+ 'name': 'Dunkerton Telephone Cooperative'
+ },
+ 'cci010': {
+ 'name': 'Duo County Telecom'
+ },
+ 'eagle': {
+ 'name': 'Eagle Communications'
+ },
+ 'weh010-east': {
+ 'name': 'East Arkansas Cable TV'
+ },
+ 'eatel': {
+ 'name': 'EATEL Video, LLC'
+ },
+ 'ell010': {
+ 'name': 'ECTA'
+ },
+ 'emerytelcom': {
+ 'name': 'Emery Telcom Video LLC'
+ },
+ 'nor200': {
+ 'name': 'Empire Access'
+ },
+ 'endeavor': {
+ 'name': 'Endeavor Communications'
+ },
+ 'sun045': {
+ 'name': 'Enhanced Telecommunications Corporation'
+ },
+ 'mid030': {
+ 'name': 'enTouch'
+ },
+ 'epb020': {
+ 'name': 'EPB Smartnet'
+ },
+ 'jea010': {
+ 'name': 'EPlus Broadband'
+ },
+ 'com065': {
+ 'name': 'ETC'
+ },
+ 'ete010': {
+ 'name': 'Etex Communications'
+ },
+ 'fbc-tele': {
+ 'name': 'F&B Communications'
+ },
+ 'fal010': {
+ 'name': 'Falcon Broadband'
+ },
+ 'fam010': {
+ 'name': 'FamilyView CableVision'
+ },
+ 'far020': {
+ 'name': 'Farmers Mutual Telephone Company'
+ },
+ 'fay010': {
+ 'name': 'Fayetteville Public Utilities'
+ },
+ 'sal060': {
+ 'name': 'fibrant'
+ },
+ 'fid010': {
+ 'name': 'Fidelity Communications'
+ },
+ 'for030': {
+ 'name': 'FJ Communications'
+ },
+ 'fli020': {
+ 'name': 'Flint River Communications'
+ },
+ 'far030': {
+ 'name': 'FMT - Jesup'
+ },
+ 'foo010': {
+ 'name': 'Foothills Communications'
+ },
+ 'for080': {
+ 'name': 'Forsyth CableNet'
+ },
+ 'fbcomm': {
+ 'name': 'Frankfort Plant Board'
+ },
+ 'tel160-fra': {
+ 'name': 'Franklin Telephone Company'
+ },
+ 'nttcftc010': {
+ 'name': 'FTC'
+ },
+ 'fullchannel': {
+ 'name': 'Full Channel, Inc.'
+ },
+ 'gar040': {
+ 'name': 'Gardonville Cooperative Telephone Association'
+ },
+ 'gbt010': {
+ 'name': 'GBT Communications, Inc.'
+ },
+ 'tec010': {
+ 'name': 'Genuine Telecom'
+ },
+ 'clr010': {
+ 'name': 'Giant Communications'
+ },
+ 'gla010': {
+ 'name': 'Glasgow EPB'
+ },
+ 'gle010': {
+ 'name': 'Glenwood Telecommunications'
+ },
+ 'gra060': {
+ 'name': 'GLW Broadband Inc.'
+ },
+ 'goldenwest': {
+ 'name': 'Golden West Cablevision'
+ },
+ 'vis030': {
+ 'name': 'Grantsburg Telcom'
+ },
+ 'gpcom': {
+ 'name': 'Great Plains Communications'
+ },
+ 'gri010': {
+ 'name': 'Gridley Cable Inc'
+ },
+ 'hbc010': {
+ 'name': 'H&B Cable Services'
+ },
+ 'hae010': {
+ 'name': 'Haefele TV Inc.'
+ },
+ 'htc010': {
+ 'name': 'Halstad Telephone Company'
+ },
+ 'har005': {
+ 'name': 'Harlan Municipal Utilities'
+ },
+ 'har020': {
+ 'name': 'Hart Communications'
+ },
+ 'ced010': {
+ 'name': 'Hartelco TV'
+ },
+ 'hea040': {
+ 'name': 'Heart of Iowa Communications Cooperative'
+ },
+ 'htc020': {
+ 'name': 'Hickory Telephone Company'
+ },
+ 'nttchig010': {
+ 'name': 'Highland Communication Services'
+ },
+ 'hig030': {
+ 'name': 'Highland Media'
+ },
+ 'spc010': {
+ 'name': 'Hilliary Communications'
+ },
+ 'hin020': {
+ 'name': 'Hinton CATV Co.'
+ },
+ 'hometel': {
+ 'name': 'HomeTel Entertainment, Inc.'
+ },
+ 'hoodcanal': {
+ 'name': 'Hood Canal Communications'
+ },
+ 'weh010-hope': {
+ 'name': 'Hope - Prescott Cable TV'
+ },
+ 'horizoncable': {
+ 'name': 'Horizon Cable TV, Inc.'
+ },
+ 'hor040': {
+ 'name': 'Horizon Chillicothe Telephone'
+ },
+ 'htc030': {
+ 'name': 'HTC Communications Co. - IL'
+ },
+ 'htccomm': {
+ 'name': 'HTC Communications, Inc. - IA'
+ },
+ 'wal005': {
+ 'name': 'Huxley Communications'
+ },
+ 'imon': {
+ 'name': 'ImOn Communications'
+ },
+ 'ind040': {
+ 'name': 'Independence Telecommunications'
+ },
+ 'rrc010': {
+ 'name': 'Inland Networks'
+ },
+ 'stc020': {
+ 'name': 'Innovative Cable TV St Croix'
+ },
+ 'car100': {
+ 'name': 'Innovative Cable TV St Thomas-St John'
+ },
+ 'icc010': {
+ 'name': 'Inside Connect Cable'
+ },
+ 'int100': {
+ 'name': 'Integra Telecom'
+ },
+ 'int050': {
+ 'name': 'Interstate Telecommunications Coop'
+ },
+ 'irv010': {
+ 'name': 'Irvine Cable'
+ },
+ 'k2c010': {
+ 'name': 'K2 Communications'
+ },
+ 'kal010': {
+ 'name': 'Kalida Telephone Company, Inc.'
+ },
+ 'kal030': {
+ 'name': 'Kalona Cooperative Telephone Company'
+ },
+ 'kmt010': {
+ 'name': 'KMTelecom'
+ },
+ 'kpu010': {
+ 'name': 'KPU Telecommunications'
+ },
+ 'kuh010': {
+ 'name': 'Kuhn Communications, Inc.'
+ },
+ 'lak130': {
+ 'name': 'Lakeland Communications'
+ },
+ 'lan010': {
+ 'name': 'Langco'
+ },
+ 'lau020': {
+ 'name': 'Laurel Highland Total Communications, Inc.'
+ },
+ 'leh010': {
+ 'name': 'Lehigh Valley Cooperative Telephone'
+ },
+ 'bra010': {
+ 'name': 'Limestone Cable/Bracken Cable'
+ },
+ 'loc020': {
+ 'name': 'LISCO'
+ },
+ 'lit020': {
+ 'name': 'Litestream'
+ },
+ 'tel140': {
+ 'name': 'LivCom'
+ },
+ 'loc010': {
+ 'name': 'LocalTel Communications'
+ },
+ 'weh010-longview': {
+ 'name': 'Longview - Kilgore Cable TV'
+ },
+ 'lon030': {
+ 'name': 'Lonsdale Video Ventures, LLC'
+ },
+ 'lns010': {
+ 'name': 'Lost Nation-Elwood Telephone Co.'
+ },
+ 'nttclpc010': {
+ 'name': 'LPC Connect'
+ },
+ 'lumos': {
+ 'name': 'Lumos Networks'
+ },
+ 'madison': {
+ 'name': 'Madison Communications'
+ },
+ 'mad030': {
+ 'name': 'Madison County Cable Inc.'
+ },
+ 'nttcmah010': {
+ 'name': 'Mahaska Communication Group'
+ },
+ 'mar010': {
+ 'name': 'Marne & Elk Horn Telephone Company'
+ },
+ 'mcc040': {
+ 'name': 'McClure Telephone Co.'
+ },
+ 'mctv': {
+ 'name': 'MCTV'
+ },
+ 'merrimac': {
+ 'name': 'Merrimac Communications Ltd.'
+ },
+ 'metronet': {
+ 'name': 'Metronet'
+ },
+ 'mhtc': {
+ 'name': 'MHTC'
+ },
+ 'midhudson': {
+ 'name': 'Mid-Hudson Cable'
+ },
+ 'midrivers': {
+ 'name': 'Mid-Rivers Communications'
+ },
+ 'mid045': {
+ 'name': 'Midstate Communications'
+ },
+ 'mil080': {
+ 'name': 'Milford Communications'
+ },
+ 'min030': {
+ 'name': 'MINET'
+ },
+ 'nttcmin010': {
+ 'name': 'Minford TV'
+ },
+ 'san040-02': {
+ 'name': 'Mitchell Telecom'
+ },
+ 'mlg010': {
+ 'name': 'MLGC'
+ },
+ 'mon060': {
+ 'name': 'Mon-Cre TVE'
+ },
+ 'mou110': {
+ 'name': 'Mountain Telephone'
+ },
+ 'mou050': {
+ 'name': 'Mountain Village Cable'
+ },
+ 'mtacomm': {
+ 'name': 'MTA Communications, LLC'
+ },
+ 'mtc010': {
+ 'name': 'MTC Cable'
+ },
+ 'med040': {
+ 'name': 'MTC Technologies'
+ },
+ 'man060': {
+ 'name': 'MTCC'
+ },
+ 'mtc030': {
+ 'name': 'MTCO Communications'
+ },
+ 'mul050': {
+ 'name': 'Mulberry Telecommunications'
+ },
+ 'mur010': {
+ 'name': 'Murray Electric System'
+ },
+ 'musfiber': {
+ 'name': 'MUS FiberNET'
+ },
+ 'mpw': {
+ 'name': 'Muscatine Power & Water'
+ },
+ 'nttcsli010': {
+ 'name': 'myEVTV.com'
+ },
+ 'nor115': {
+ 'name': 'NCC'
+ },
+ 'nor260': {
+ 'name': 'NDTC'
+ },
+ 'nctc': {
+ 'name': 'Nebraska Central Telecom, Inc.'
+ },
+ 'nel020': {
+ 'name': 'Nelsonville TV Cable'
+ },
+ 'nem010': {
+ 'name': 'Nemont'
+ },
+ 'new075': {
+ 'name': 'New Hope Telephone Cooperative'
+ },
+ 'nor240': {
+ 'name': 'NICP'
+ },
+ 'cic010': {
+ 'name': 'NineStar Connect'
+ },
+ 'nktelco': {
+ 'name': 'NKTelco'
+ },
+ 'nortex': {
+ 'name': 'Nortex Communications'
+ },
+ 'nor140': {
+ 'name': 'North Central Telephone Cooperative'
+ },
+ 'nor030': {
+ 'name': 'Northland Communications'
+ },
+ 'nor075': {
+ 'name': 'Northwest Communications'
+ },
+ 'nor125': {
+ 'name': 'Norwood Light Broadband'
+ },
+ 'net010': {
+ 'name': 'Nsight Telservices'
+ },
+ 'dur010': {
+ 'name': 'Ntec'
+ },
+ 'nts010': {
+ 'name': 'NTS Communications'
+ },
+ 'new045': {
+ 'name': 'NU-Telecom'
+ },
+ 'nulink': {
+ 'name': 'NuLink'
+ },
+ 'jam030': {
+ 'name': 'NVC'
+ },
+ 'far035': {
+ 'name': 'OmniTel Communications'
+ },
+ 'onesource': {
+ 'name': 'OneSource Communications'
+ },
+ 'cit230': {
+ 'name': 'Opelika Power Services'
+ },
+ 'daltonutilities': {
+ 'name': 'OptiLink'
+ },
+ 'mid140': {
+ 'name': 'OPTURA'
+ },
+ 'ote010': {
+ 'name': 'OTEC Communication Company'
+ },
+ 'cci020': {
+ 'name': 'Packerland Broadband'
+ },
+ 'pan010': {
+ 'name': 'Panora Telco/Guthrie Center Communications'
+ },
+ 'otter': {
+ 'name': 'Park Region Telephone & Otter Tail Telcom'
+ },
+ 'mid050': {
+ 'name': 'Partner Communications Cooperative'
+ },
+ 'fib010': {
+ 'name': 'Pathway'
+ },
+ 'paulbunyan': {
+ 'name': 'Paul Bunyan Communications'
+ },
+ 'pem020': {
+ 'name': 'Pembroke Telephone Company'
+ },
+ 'mck010': {
+ 'name': 'Peoples Rural Telephone Cooperative'
+ },
+ 'pul010': {
+ 'name': 'PES Energize'
+ },
+ 'phi010': {
+ 'name': 'Philippi Communications System'
+ },
+ 'phonoscope': {
+ 'name': 'Phonoscope Cable'
+ },
+ 'pin070': {
+ 'name': 'Pine Belt Communications, Inc.'
+ },
+ 'weh010-pine': {
+ 'name': 'Pine Bluff Cable TV'
+ },
+ 'pin060': {
+ 'name': 'Pineland Telephone Cooperative'
+ },
+ 'cam010': {
+ 'name': 'Pinpoint Communications'
+ },
+ 'pio060': {
+ 'name': 'Pioneer Broadband'
+ },
+ 'pioncomm': {
+ 'name': 'Pioneer Communications'
+ },
+ 'pioneer': {
+ 'name': 'Pioneer DTV'
+ },
+ 'pla020': {
+ 'name': 'Plant TiftNet, Inc.'
+ },
+ 'par010': {
+ 'name': 'PLWC'
+ },
+ 'pro035': {
+ 'name': 'PMT'
+ },
+ 'vik011': {
+ 'name': 'Polar Cablevision'
+ },
+ 'pottawatomie': {
+ 'name': 'Pottawatomie Telephone Co.'
+ },
+ 'premiercomm': {
+ 'name': 'Premier Communications'
+ },
+ 'psc010': {
+ 'name': 'PSC'
+ },
+ 'pan020': {
+ 'name': 'PTCI'
+ },
+ 'qco010': {
+ 'name': 'QCOL'
+ },
+ 'qua010': {
+ 'name': 'Quality Cablevision'
+ },
+ 'rad010': {
+ 'name': 'Radcliffe Telephone Company'
+ },
+ 'car040': {
+ 'name': 'Rainbow Communications'
+ },
+ 'rai030': {
+ 'name': 'Rainier Connect'
+ },
+ 'ral010': {
+ 'name': 'Ralls Technologies'
+ },
+ 'rct010': {
+ 'name': 'RC Technologies'
+ },
+ 'red040': {
+ 'name': 'Red River Communications'
+ },
+ 'ree010': {
+ 'name': 'Reedsburg Utility Commission'
+ },
+ 'mol010': {
+ 'name': 'Reliance Connects- Oregon'
+ },
+ 'res020': {
+ 'name': 'Reserve Telecommunications'
+ },
+ 'weh010-resort': {
+ 'name': 'Resort TV Cable'
+ },
+ 'rld010': {
+ 'name': 'Richland Grant Telephone Cooperative, Inc.'
+ },
+ 'riv030': {
+ 'name': 'River Valley Telecommunications Coop'
+ },
+ 'rockportcable': {
+ 'name': 'Rock Port Cablevision'
+ },
+ 'rsf010': {
+ 'name': 'RS Fiber'
+ },
+ 'rtc': {
+ 'name': 'RTC Communication Corp'
+ },
+ 'res040': {
+ 'name': 'RTC-Reservation Telephone Coop.'
+ },
+ 'rte010': {
+ 'name': 'RTEC Communications'
+ },
+ 'stc010': {
+ 'name': 'S&T'
+ },
+ 'san020': {
+ 'name': 'San Bruno Cable TV'
+ },
+ 'san040-01': {
+ 'name': 'Santel'
+ },
+ 'sav010': {
+ 'name': 'SCI Broadband-Savage Communications Inc.'
+ },
+ 'sco050': {
+ 'name': 'Scottsboro Electric Power Board'
+ },
+ 'scr010': {
+ 'name': 'Scranton Telephone Company'
+ },
+ 'selco': {
+ 'name': 'SELCO'
+ },
+ 'she010': {
+ 'name': 'Shentel'
+ },
+ 'she030': {
+ 'name': 'Sherwood Mutual Telephone Association, Inc.'
+ },
+ 'ind060-ssc': {
+ 'name': 'Silver Star Communications'
+ },
+ 'sjoberg': {
+ 'name': 'Sjoberg\'s Inc.'
+ },
+ 'sou025': {
+ 'name': 'SKT'
+ },
+ 'sky050': {
+ 'name': 'SkyBest TV'
+ },
+ 'nttcsmi010': {
+ 'name': 'Smithville Communications'
+ },
+ 'woo010': {
+ 'name': 'Solarus'
+ },
+ 'sou075': {
+ 'name': 'South Central Rural Telephone Cooperative'
+ },
+ 'sou065': {
+ 'name': 'South Holt Cablevision, Inc.'
+ },
+ 'sou035': {
+ 'name': 'South Slope Cooperative Communications'
+ },
+ 'spa020': {
+ 'name': 'Spanish Fork Community Network'
+ },
+ 'spe010': {
+ 'name': 'Spencer Municipal Utilities'
+ },
+ 'spi005': {
+ 'name': 'Spillway Communications, Inc.'
+ },
+ 'srt010': {
+ 'name': 'SRT'
+ },
+ 'cccsmc010': {
+ 'name': 'St. Maarten Cable TV'
+ },
+ 'sta025': {
+ 'name': 'Star Communications'
+ },
+ 'sco020': {
+ 'name': 'STE'
+ },
+ 'uin010': {
+ 'name': 'STRATA Networks'
+ },
+ 'sum010': {
+ 'name': 'Sumner Cable TV'
+ },
+ 'pie010': {
+ 'name': 'Surry TV/PCSI TV'
+ },
+ 'swa010': {
+ 'name': 'Swayzee Communications'
+ },
+ 'sweetwater': {
+ 'name': 'Sweetwater Cable Television Co'
+ },
+ 'weh010-talequah': {
+ 'name': 'Tahlequah Cable TV'
+ },
+ 'tct': {
+ 'name': 'TCT'
+ },
+ 'tel050': {
+ 'name': 'Tele-Media Company'
+ },
+ 'com050': {
+ 'name': 'The Community Agency'
+ },
+ 'thr020': {
+ 'name': 'Three River'
+ },
+ 'cab140': {
+ 'name': 'Town & Country Technologies'
+ },
+ 'tra010': {
+ 'name': 'Trans-Video'
+ },
+ 'tre010': {
+ 'name': 'Trenton TV Cable Company'
+ },
+ 'tcc': {
+ 'name': 'Tri County Communications Cooperative'
+ },
+ 'tri025': {
+ 'name': 'TriCounty Telecom'
+ },
+ 'tri110': {
+ 'name': 'TrioTel Communications, Inc.'
+ },
+ 'tro010': {
+ 'name': 'Troy Cablevision, Inc.'
+ },
+ 'tsc': {
+ 'name': 'TSC'
+ },
+ 'cit220': {
+ 'name': 'Tullahoma Utilities Board'
+ },
+ 'tvc030': {
+ 'name': 'TV Cable of Rensselaer'
+ },
+ 'tvc015': {
+ 'name': 'TVC Cable'
+ },
+ 'cab180': {
+ 'name': 'TVision'
+ },
+ 'twi040': {
+ 'name': 'Twin Lakes'
+ },
+ 'tvtinc': {
+ 'name': 'Twin Valley'
+ },
+ 'uis010': {
+ 'name': 'Union Telephone Company'
+ },
+ 'uni110': {
+ 'name': 'United Communications - TN'
+ },
+ 'uni120': {
+ 'name': 'United Services'
+ },
+ 'uss020': {
+ 'name': 'US Sonet'
+ },
+ 'cab060': {
+ 'name': 'USA Communications'
+ },
+ 'she005': {
+ 'name': 'USA Communications/Shellsburg, IA'
+ },
+ 'val040': {
+ 'name': 'Valley TeleCom Group'
+ },
+ 'val025': {
+ 'name': 'Valley Telecommunications'
+ },
+ 'val030': {
+ 'name': 'Valparaiso Broadband'
+ },
+ 'cla050': {
+ 'name': 'Vast Broadband'
+ },
+ 'sul015': {
+ 'name': 'Venture Communications Cooperative, Inc.'
+ },
+ 'ver025': {
+ 'name': 'Vernon Communications Co-op'
+ },
+ 'weh010-vicksburg': {
+ 'name': 'Vicksburg Video'
+ },
+ 'vis070': {
+ 'name': 'Vision Communications'
+ },
+ 'volcanotel': {
+ 'name': 'Volcano Vision, Inc.'
+ },
+ 'vol040-02': {
+ 'name': 'VolFirst / BLTV'
+ },
+ 'ver070': {
+ 'name': 'VTel'
+ },
+ 'nttcvtx010': {
+ 'name': 'VTX1'
+ },
+ 'bci010-02': {
+ 'name': 'Vyve Broadband'
+ },
+ 'wab020': {
+ 'name': 'Wabash Mutual Telephone'
+ },
+ 'waitsfield': {
+ 'name': 'Waitsfield Cable'
+ },
+ 'wal010': {
+ 'name': 'Walnut Communications'
+ },
+ 'wavebroadband': {
+ 'name': 'Wave'
+ },
+ 'wav030': {
+ 'name': 'Waverly Communications Utility'
+ },
+ 'wbi010': {
+ 'name': 'WBI'
+ },
+ 'web020': {
+ 'name': 'Webster-Calhoun Cooperative Telephone Association'
+ },
+ 'wes005': {
+ 'name': 'West Alabama TV Cable'
+ },
+ 'carolinata': {
+ 'name': 'West Carolina Communications'
+ },
+ 'wct010': {
+ 'name': 'West Central Telephone Association'
+ },
+ 'wes110': {
+ 'name': 'West River Cooperative Telephone Company'
+ },
+ 'ani030': {
+ 'name': 'WesTel Systems'
+ },
+ 'westianet': {
+ 'name': 'Western Iowa Networks'
+ },
+ 'nttcwhi010': {
+ 'name': 'Whidbey Telecom'
+ },
+ 'weh010-white': {
+ 'name': 'White County Cable TV'
+ },
+ 'wes130': {
+ 'name': 'Wiatel'
+ },
+ 'wik010': {
+ 'name': 'Wiktel'
+ },
+ 'wil070': {
+ 'name': 'Wilkes Communications, Inc./RiverStreet Networks'
+ },
+ 'wil015': {
+ 'name': 'Wilson Communications'
+ },
+ 'win010': {
+ 'name': 'Windomnet/SMBS'
+ },
+ 'win090': {
+ 'name': 'Windstream Cable TV'
+ },
+ 'wcta': {
+ 'name': 'Winnebago Cooperative Telecom Association'
+ },
+ 'wtc010': {
+ 'name': 'WTC'
+ },
+ 'wil040': {
+ 'name': 'WTC Communications, Inc.'
+ },
+ 'wya010': {
+ 'name': 'Wyandotte Cable'
+ },
+ 'hin020-02': {
+ 'name': 'X-Stream Services'
+ },
+ 'xit010': {
+ 'name': 'XIT Communications'
+ },
+ 'yel010': {
+ 'name': 'Yelcot Communications'
+ },
+ 'mid180-01': {
+ 'name': 'yondoo'
+ },
+ 'cou060': {
+ 'name': 'Zito Media'
+ },
+}
+
+
+class AdobePassIE(InfoExtractor):
+ _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
+ _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
+ _MVPD_CACHE = 'ap-mvpd'
+
+ _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = self.geo_verification_headers()
+ headers.update(kwargs.get('headers', {}))
+ kwargs['headers'] = headers
+ return super(AdobePassIE, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
+
+ @staticmethod
+ def _get_mvpd_resource(provider_id, title, guid, rating):
+ channel = etree.Element('channel')
+ channel_title = etree.SubElement(channel, 'title')
+ channel_title.text = provider_id
+ item = etree.SubElement(channel, 'item')
+ resource_title = etree.SubElement(item, 'title')
+ resource_title.text = title
+ resource_guid = etree.SubElement(item, 'guid')
+ resource_guid.text = guid
+ resource_rating = etree.SubElement(item, 'media:rating')
+ resource_rating.attrib = {'scheme': 'urn:v-chip'}
+ resource_rating.text = rating
+ return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
+
+ def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
+ def xml_text(xml_str, tag):
+ return self._search_regex(
+ '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
+
+ def is_expired(token, date_ele):
+ token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
+ return token_expires and token_expires <= int(time.time())
+
+ def post_form(form_page_res, note, data={}):
+ form_page, urlh = form_page_res
+ post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
+ if not re.match(r'https?://', post_url):
+ post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
+ form_data = self._hidden_inputs(form_page)
+ form_data.update(data)
+ return self._download_webpage_handle(
+ post_url, video_id, note, data=urlencode_postdata(form_data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ def raise_mvpd_required():
+ raise ExtractorError(
+ 'This video is only available for users of participating TV providers. '
+ 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
+ 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
+
+ def extract_redirect_url(html, url=None, fatal=False):
+ # TODO: eliminate code duplication with generic extractor and move
+ # redirection code into _download_webpage_handle
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ redirect_url = self._search_regex(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ html, 'meta refresh redirect',
+ default=NO_DEFAULT if fatal else None, fatal=fatal)
+ if not redirect_url:
+ return None
+ if url:
+ redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
+ return redirect_url
+
+ mvpd_headers = {
+ 'ap_42': 'anonymous',
+ 'ap_11': 'Linux i686',
+ 'ap_z': self._USER_AGENT,
+ 'User-Agent': self._USER_AGENT,
+ }
+
+ guid = xml_text(resource, 'guid') if '<' in resource else resource
+ count = 0
+ while count < 2:
+ requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
+ authn_token = requestor_info.get('authn_token')
+ if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
+ authn_token = None
+ if not authn_token:
+ # TODO add support for other TV Providers
+ mso_id = self._downloader.params.get('ap_mso')
+ if not mso_id:
+ raise_mvpd_required()
+ username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
+ if not username or not password:
+ raise_mvpd_required()
+ mso_info = MSO_INFO[mso_id]
+
+ provider_redirect_page_res = self._download_webpage_handle(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
+ 'Downloading Provider Redirect Page', query={
+ 'noflash': 'true',
+ 'mso_id': mso_id,
+ 'requestor_id': requestor_id,
+ 'no_iframe': 'false',
+ 'domain_name': 'adobe.com',
+ 'redirect_url': url,
+ })
+
+ if mso_id == 'Comcast_SSO':
+ # Comcast page flow varies by video site and whether you
+ # are on Comcast's network.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ if 'automatically signing you in' in provider_redirect_page:
+ oauth_redirect_url = self._html_search_regex(
+ r'window\.location\s*=\s*[\'"]([^\'"]+)',
+ provider_redirect_page, 'oauth redirect')
+ self._download_webpage(
+ oauth_redirect_url, video_id, 'Confirming auto login')
+ else:
+ if '<form name="signin"' in provider_redirect_page:
+ provider_login_page_res = provider_redirect_page_res
+ elif 'http-equiv="refresh"' in provider_redirect_page:
+ oauth_redirect_url = extract_redirect_url(
+ provider_redirect_page, fatal=True)
+ provider_login_page_res = self._download_webpage_handle(
+ oauth_redirect_url, video_id,
+ self._DOWNLOADING_LOGIN_PAGE)
+ else:
+ provider_login_page_res = post_form(
+ provider_redirect_page_res,
+ self._DOWNLOADING_LOGIN_PAGE)
+
+ mvpd_confirm_page_res = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ mvpd_confirm_page, urlh = mvpd_confirm_page_res
+ if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Verizon':
+ # In general, if you're connecting from a Verizon-assigned IP,
+ # you will not actually pass your credentials.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ if 'Please wait ...' in provider_redirect_page:
+ saml_redirect_url = self._html_search_regex(
+ r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
+ provider_redirect_page,
+ 'SAML Redirect URL', group='url')
+ saml_login_page = self._download_webpage(
+ saml_redirect_url, video_id,
+ 'Downloading SAML Login Page')
+ else:
+ saml_login_page_res = post_form(
+ provider_redirect_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ saml_login_page, urlh = saml_login_page_res
+ if 'Please try again.' in saml_login_page:
+ raise ExtractorError(
+ 'We\'re sorry, but either the User ID or Password entered is not correct.')
+ saml_login_url = self._search_regex(
+ r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
+ saml_login_page, 'SAML Login URL', group='url')
+ saml_response_json = self._download_json(
+ saml_login_url, video_id, 'Downloading SAML Response',
+ headers={'Content-Type': 'text/xml'})
+ self._download_webpage(
+ saml_response_json['targetValue'], video_id,
+ 'Confirming Login', data=urlencode_postdata({
+ 'SAMLResponse': saml_response_json['SAMLResponse'],
+ 'RelayState': saml_response_json['RelayState']
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ else:
+ # Some providers (e.g. DIRECTV NOW) have another meta refresh
+ # based redirect that should be followed.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_redirect_page, url=urlh.geturl())
+ if provider_refresh_redirect_url:
+ provider_redirect_page_res = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Provider Redirect Page (meta refresh)')
+ provider_login_page_res = post_form(
+ provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
+ mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
+ mso_info.get('username_field', 'username'): username,
+ mso_info.get('password_field', 'password'): password,
+ })
+ if mso_id != 'Rogers':
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+
+ session = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
+ 'Retrieving Session', data=urlencode_postdata({
+ '_method': 'GET',
+ 'requestor_id': requestor_id,
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in session:
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ authn_token = unescapeHTML(xml_text(session, 'authnToken'))
+ requestor_info['authn_token'] = authn_token
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+ authz_token = requestor_info.get(guid)
+ if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
+ authz_token = None
+ if not authz_token:
+ authorize = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
+ 'Retrieving Authorization Token', data=urlencode_postdata({
+ 'resource_id': resource,
+ 'requestor_id': requestor_id,
+ 'authentication_token': authn_token,
+ 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
+ 'userMeta': '1',
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in authorize:
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ if '<error' in authorize:
+ raise ExtractorError(xml_text(authorize, 'details'), expected=True)
+ authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
+ requestor_info[guid] = authz_token
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+ mvpd_headers.update({
+ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
+ 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
+ })
+
+ short_authorize = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
+ video_id, 'Retrieving Media Token', data=urlencode_postdata({
+ 'authz_token': authz_token,
+ 'requestor_id': requestor_id,
+ 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
+ 'hashed_guid': 'false',
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in short_authorize:
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ return short_authorize
diff --git a/hypervideo_dl/extractor/adobetv.py b/hypervideo_dl/extractor/adobetv.py
new file mode 100644
index 0000000..80060f0
--- /dev/null
+++ b/hypervideo_dl/extractor/adobetv.py
@@ -0,0 +1,288 @@
+from __future__ import unicode_literals
+
+import functools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ ISO639Utils,
+ OnDemandPagedList,
+ parse_duration,
+ str_or_none,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class AdobeTVBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, query, note=None):
+ return self._download_json(
+ 'http://tv.adobe.com/api/v4/' + path,
+ video_id, note, query=query)['data']
+
+ def _parse_subtitles(self, video_data, url_key):
+ subtitles = {}
+ for translation in video_data.get('translations', []):
+ vtt_path = translation.get(url_key)
+ if not vtt_path:
+ continue
+ lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
+ subtitles.setdefault(lang, []).append({
+ 'ext': 'vtt',
+ 'url': vtt_path,
+ })
+ return subtitles
+
+ def _parse_video_data(self, video_data):
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
+
+ s3_extracted = False
+ formats = []
+ for source in video_data.get('videos', []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ f = {
+ 'format_id': source.get('quality_level'),
+ 'fps': int_or_none(source.get('frame_rate')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ 'width': int_or_none(source.get('width')),
+ 'url': source_url,
+ }
+ original_filename = source.get('original_filename')
+ if original_filename:
+ if not (f.get('height') and f.get('width')):
+ mobj = re.search(r'_(\d+)x(\d+)', original_filename)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ if original_filename.startswith('s3://') and not s3_extracted:
+ formats.append({
+ 'format_id': 'original',
+ 'preference': 1,
+ 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
+ })
+ s3_extracted = True
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
+ 'formats': formats,
+ 'subtitles': self._parse_subtitles(video_data, 'vtt'),
+ }
+
+
+class AdobeTVEmbedIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:embed'
+ _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.adobe.com/embed/22/4153',
+ 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
+ 'info_dict': {
+ 'id': '4153',
+ 'ext': 'flv',
+ 'title': 'Creating Graphics Optimized for BlackBerry',
+ 'description': 'md5:eac6e8dced38bdaae51cd94447927459',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20091109',
+ 'duration': 377,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
+ return self._parse_video_data(video_data)
+
+
+class AdobeTVIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
+ 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
+ 'info_dict': {
+ 'id': '10981',
+ 'ext': 'mp4',
+ 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
+ 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20110914',
+ 'duration': 60,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+
+ video_data = self._call_api(
+ 'episode/get', urlname, {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ 'urlname': urlname,
+ })[0]
+ return self._parse_video_data(video_data)
+
+
+class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
+ _PAGE_SIZE = 25
+
+ def _fetch_page(self, display_id, query, page):
+ page += 1
+ query['page'] = page
+ for element_data in self._call_api(
+ self._RESOURCE, display_id, query, 'Download Page %d' % page):
+ yield self._process_data(element_data)
+
+ def _extract_playlist_entries(self, display_id, query):
+ return OnDemandPagedList(functools.partial(
+ self._fetch_page, display_id, query), self._PAGE_SIZE)
+
+
+class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:show'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
+ 'info_dict': {
+ 'id': '36',
+ 'title': 'The Complete Picture with Julieanne Kost',
+ 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
+ },
+ 'playlist_mincount': 136,
+ }
+ _RESOURCE = 'episode'
+ _process_data = AdobeTVBaseIE._parse_video_data
+
+ def _real_extract(self, url):
+ language, show_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ }
+
+ show_data = self._call_api(
+ 'show/get', show_urlname, query)[0]
+
+ return self.playlist_result(
+ self._extract_playlist_entries(show_urlname, query),
+ str_or_none(show_data.get('id')),
+ show_data.get('show_name'),
+ show_data.get('show_description'))
+
+
+class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:channel'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/channel/development',
+ 'info_dict': {
+ 'id': 'development',
+ },
+ 'playlist_mincount': 96,
+ }
+ _RESOURCE = 'show'
+
+ def _process_data(self, show_data):
+ return self.url_result(
+ show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
+
+ def _real_extract(self, url):
+ language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = {
+ 'channel_urlname': channel_urlname,
+ 'language': language,
+ }
+ if category_urlname:
+ query['category_urlname'] = category_urlname
+
+ return self.playlist_result(
+ self._extract_playlist_entries(channel_urlname, query),
+ channel_urlname)
+
+
+class AdobeTVVideoIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:video'
+ _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+
+ _TEST = {
+ # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
+ 'url': 'https://video.tv.adobe.com/v/2456/',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_data = self._parse_json(self._search_regex(
+ r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
+ title = video_data['title']
+
+ formats = []
+ sources = video_data.get('sources') or []
+ for source in sources:
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ formats.append({
+ 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
+ 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
+ 'height': int_or_none(source.get('height') or None),
+ 'tbr': int_or_none(source.get('bitrate') or None),
+ 'width': int_or_none(source.get('width') or None),
+ 'url': source_src,
+ })
+ self._sort_formats(formats)
+
+ # For both metadata and downloaded files the duration varies among
+ # formats. I just pick the max one
+ duration = max(filter(None, [
+ float_or_none(source.get('duration'), scale=1000)
+ for source in sources]))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('video', {}).get('poster'),
+ 'duration': duration,
+ 'subtitles': self._parse_subtitles(video_data, 'vttPath'),
+ }
diff --git a/hypervideo_dl/extractor/adultswim.py b/hypervideo_dl/extractor/adultswim.py
new file mode 100644
index 0000000..8d1d9ac
--- /dev/null
+++ b/hypervideo_dl/extractor/adultswim.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ mimetype2ext,
+ parse_age_limit,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class AdultSwimIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
+
+ _TESTS = [{
+ 'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
+ 'info_dict': {
+ 'id': 'rQxZvXQ4ROaSOqq-or2Mow',
+ 'ext': 'mp4',
+ 'title': 'Rick and Morty - Pilot',
+ 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
+ 'timestamp': 1543294800,
+ 'upload_date': '20181127',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+ 'ext': 'mp4',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
+ 'upload_date': '20080124',
+ 'timestamp': 1201150800,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
+ 'info_dict': {
+ 'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
+ 'ext': 'mp4',
+ 'title': 'Decker - Inside Decker: A New Hero',
+ 'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
+ 'timestamp': 1469480460,
+ 'upload_date': '20160725',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.adultswim.com/videos/attack-on-titan',
+ 'info_dict': {
+ 'id': 'attack-on-titan',
+ 'title': 'Attack on Titan',
+ 'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'http://www.adultswim.com/videos/streams/williams-stream',
+ 'info_dict': {
+ 'id': 'd8DEBj7QRfetLsRgFnGEyg',
+ 'ext': 'mp4',
+ 'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'description': 'original programming',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }]
+
+ def _real_extract(self, url):
+ show_path, episode_path = re.match(self._VALID_URL, url).groups()
+ display_id = episode_path or show_path
+ query = '''query {
+ getShowBySlug(slug:"%s") {
+ %%s
+ }
+}''' % show_path
+ if episode_path:
+ query = query % '''title
+ getVideoBySlug(slug:"%s") {
+ _id
+ auth
+ description
+ duration
+ episodeNumber
+ launchDate
+ mediaID
+ seasonNumber
+ poster
+ title
+ tvRating
+ }''' % episode_path
+ ['getVideoBySlug']
+ else:
+ query = query % '''metaDescription
+ title
+ videos(first:1000,sort:["episode_number"]) {
+ edges {
+ node {
+ _id
+ slug
+ }
+ }
+ }'''
+ show_data = self._download_json(
+ 'https://www.adultswim.com/api/search', display_id,
+ data=json.dumps({'query': query}).encode(),
+ headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
+ if episode_path:
+ video_data = show_data['getVideoBySlug']
+ video_id = video_data['_id']
+ episode_title = title = video_data['title']
+ series = show_data.get('title')
+ if series:
+ title = '%s - %s' % (series, title)
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(video_data.get('description')),
+ 'duration': float_or_none(video_data.get('duration')),
+ 'formats': [],
+ 'subtitles': {},
+ 'age_limit': parse_age_limit(video_data.get('tvRating')),
+ 'thumbnail': video_data.get('poster'),
+ 'timestamp': parse_iso8601(video_data.get('launchDate')),
+ 'series': series,
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode': episode_title,
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ }
+
+ auth = video_data.get('auth')
+ media_id = video_data.get('mediaID')
+ if media_id:
+ info.update(self._extract_ngtv_info(media_id, {
+ # CDN_TOKEN_APP_ID from:
+ # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
+ 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
+ }, {
+ 'url': url,
+ 'site_name': 'AdultSwim',
+ 'auth_required': auth,
+ }))
+
+ if not auth:
+ extract_data = self._download_json(
+ 'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
+ video_id, query={'fields': 'stream'}, fatal=False) or {}
+ assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
+ for asset in assets:
+ asset_url = asset.get('url')
+ if not asset_url:
+ continue
+ ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
+ if ext == 'm3u8':
+ info['formats'].extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ continue
+ # info['formats'].extend(self._extract_f4m_formats(
+ # asset_url, video_id, f4m_id='hds', fatal=False))
+ elif ext in ('scc', 'ttml', 'vtt'):
+ info['subtitles'].setdefault('en', []).append({
+ 'url': asset_url,
+ })
+ self._sort_formats(info['formats'])
+
+ return info
+ else:
+ entries = []
+ for edge in show_data.get('videos', {}).get('edges', []):
+ video = edge.get('node') or {}
+ slug = video.get('slug')
+ if not slug:
+ continue
+ entries.append(self.url_result(
+ 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
+ 'AdultSwim', video.get('_id')))
+ return self.playlist_result(
+ entries, show_path, show_data.get('title'),
+ strip_or_none(show_data.get('metaDescription')))
diff --git a/hypervideo_dl/extractor/aenetworks.py b/hypervideo_dl/extractor/aenetworks.py
new file mode 100644
index 0000000..e55c03f
--- /dev/null
+++ b/hypervideo_dl/extractor/aenetworks.py
@@ -0,0 +1,342 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
+ int_or_none,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class AENetworksBaseIE(ThePlatformIE):
+ _BASE_URL_REGEX = r'''(?x)https?://
+ (?:(?:www|play|watch)\.)?
+ (?P<domain>
+ (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
+ fyi\.tv
+ )/'''
+ _THEPLATFORM_KEY = 'crazyjava'
+ _THEPLATFORM_SECRET = 's3cr3t'
+ _DOMAIN_MAP = {
+ 'history.com': ('HISTORY', 'history'),
+ 'aetv.com': ('AETV', 'aetv'),
+ 'mylifetime.com': ('LIFETIME', 'lifetime'),
+ 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
+ 'fyi.tv': ('FYI', 'fyi'),
+ 'historyvault.com': (None, 'historyvault'),
+ 'biography.com': (None, 'biography'),
+ }
+
+ def _extract_aen_smil(self, smil_url, video_id, auth=None):
+ query = {'mbr': 'true'}
+ if auth:
+ query['auth'] = auth
+ TP_SMIL_QUERY = [{
+ 'assetTypes': 'high_video_ak',
+ 'switch': 'hls_high_ak'
+ }, {
+ 'assetTypes': 'high_video_s3'
+ }, {
+ 'assetTypes': 'high_video_s3',
+ 'switch': 'hls_high_fastly',
+ }]
+ formats = []
+ subtitles = {}
+ last_e = None
+ for q in TP_SMIL_QUERY:
+ q.update(query)
+ m_url = update_url_query(smil_url, q)
+ m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
+ except ExtractorError as e:
+ if isinstance(e, GeoRestrictedError):
+ raise
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ raise last_e
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
+ requestor_id, brand = self._DOMAIN_MAP[domain]
+ result = self._download_json(
+ 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
+ filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+ title = result['title']
+ video_id = result['id']
+ media_url = result['publicUrl']
+ theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
+ r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+ info = self._parse_theplatform_metadata(theplatform_metadata)
+ auth = None
+ if theplatform_metadata.get('AETN$isBehindWall'):
+ resource = self._get_mvpd_resource(
+ requestor_id, theplatform_metadata['title'],
+ theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
+ theplatform_metadata['ratings'][0]['rating'])
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ info.update(self._extract_aen_smil(media_url, video_id, auth))
+ info.update({
+ 'title': title,
+ 'series': result.get('seriesName'),
+ 'season_number': int_or_none(result.get('tvSeasonNumber')),
+ 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
+ })
+ return info
+
+
+class AENetworksIE(AENetworksBaseIE):
+ IE_NAME = 'aenetworks'
+ IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
+ _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
+ shows/[^/]+/season-\d+/episode-\d+|
+ (?:
+ (?:movie|special)s/[^/]+|
+ (?:shows/[^/]+/)?videos
+ )/[^/?#&]+
+ )'''
+ _TESTS = [{
+ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
+ 'info_dict': {
+ 'id': '22253814',
+ 'ext': 'mp4',
+ 'title': 'Winter is Coming',
+ 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
+ 'timestamp': 1338306241,
+ 'upload_date': '20120529',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'skip': 'This video is only available for users of participating TV providers.',
+ }, {
+ 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
+ 'info_dict': {
+ 'id': '600587331957',
+ 'ext': 'mp4',
+ 'title': 'Inlawful Entry',
+ 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
+ 'timestamp': 1452634428,
+ 'upload_date': '20160112',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }, {
+ 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
+ 'only_matching': True
+ }, {
+ 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.history.com/videos/history-of-valentines-day',
+ 'only_matching': True
+ }, {
+ 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ domain, canonical = re.match(self._VALID_URL, url).groups()
+ return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
+
+
+class AENetworksListBaseIE(AENetworksBaseIE):
+ def _call_api(self, resource, slug, brand, fields):
+ return self._download_json(
+ 'https://yoga.appsvcs.aetnd.com/graphql',
+ slug, query={'brand': brand}, data=urlencode_postdata({
+ 'query': '''{
+ %s(slug: "%s") {
+ %s
+ }
+}''' % (resource, slug, fields),
+ }))['data'][resource]
+
+ def _real_extract(self, url):
+ domain, slug = re.match(self._VALID_URL, url).groups()
+ _, brand = self._DOMAIN_MAP[domain]
+ playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
+ base_url = 'http://watch.%s' % domain
+
+ entries = []
+ for item in (playlist.get(self._ITEMS_KEY) or []):
+ doc = self._get_doc(item)
+ canonical = doc.get('canonical')
+ if not canonical:
+ continue
+ entries.append(self.url_result(
+ base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
+
+ description = None
+ if self._PLAYLIST_DESCRIPTION_KEY:
+ description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
+
+ return self.playlist_result(
+ entries, playlist.get('id'),
+ playlist.get(self._PLAYLIST_TITLE_KEY), description)
+
+
+class AENetworksCollectionIE(AENetworksListBaseIE):
+ IE_NAME = 'aenetworks:collection'
+ _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
+ 'info_dict': {
+ 'id': '282',
+ 'title': 'America The Story of Us',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.historyvault.com/collections/mysteryquest',
+ 'only_matching': True
+ }]
+ _RESOURCE = 'list'
+ _ITEMS_KEY = 'items'
+ _PLAYLIST_TITLE_KEY = 'display_title'
+ _PLAYLIST_DESCRIPTION_KEY = None
+ _FIELDS = '''id
+ display_title
+ items {
+ ... on ListVideoItem {
+ doc {
+ canonical
+ id
+ }
+ }
+ }'''
+
+ def _get_doc(self, item):
+ return item.get('doc') or {}
+
+
+class AENetworksShowIE(AENetworksListBaseIE):
+ IE_NAME = 'aenetworks:show'
+ _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.history.com/shows/ancient-aliens',
+ 'info_dict': {
+ 'id': 'SERIES1574',
+ 'title': 'Ancient Aliens',
+ 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
+ },
+ 'playlist_mincount': 150,
+ }]
+ _RESOURCE = 'series'
+ _ITEMS_KEY = 'episodes'
+ _PLAYLIST_TITLE_KEY = 'title'
+ _PLAYLIST_DESCRIPTION_KEY = 'description'
+ _FIELDS = '''description
+ id
+ title
+ episodes {
+ canonical
+ id
+ }'''
+
+ def _get_doc(self, item):
+ return item
+
+
+class HistoryTopicIE(AENetworksBaseIE):
+ IE_NAME = 'history:topic'
+ IE_DESC = 'History.com Topic'
+ _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
+ _TESTS = [{
+ 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
+ 'info_dict': {
+ 'id': '40700995724',
+ 'ext': 'mp4',
+ 'title': "History of Valentine’s Day",
+ 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+ 'timestamp': 1375819729,
+ 'upload_date': '20130806',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.history.com/videos/' + display_id,
+ AENetworksIE.ie_key())
+
+
+class HistoryPlayerIE(AENetworksBaseIE):
+ IE_NAME = 'history:player'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
+ _TESTS = []
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_aetn_info(domain, 'id', video_id, url)
+
+
+class BiographyIE(AENetworksBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
+ 'info_dict': {
+ 'id': '30322987',
+ 'ext': 'mp4',
+ 'title': 'Vincent Van Gogh - Full Episode',
+ 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
+ 'timestamp': 1311970571,
+ 'upload_date': '20110729',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_url = self._search_regex(
+ r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
+ webpage, 'player URL')
+ return self.url_result(player_url, HistoryPlayerIE.ie_key())
diff --git a/hypervideo_dl/extractor/afreecatv.py b/hypervideo_dl/extractor/afreecatv.py
new file mode 100644
index 0000000..b56abb1
--- /dev/null
+++ b/hypervideo_dl/extractor/afreecatv.py
@@ -0,0 +1,367 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_xpath
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ url_or_none,
+ urlencode_postdata,
+ xpath_text,
+)
+
+
+class AfreecaTVIE(InfoExtractor):
+ IE_NAME = 'afreecatv'
+ IE_DESC = 'afreecatv.com'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
+ (?:
+ /app/(?:index|read_ucc_bbs)\.cgi|
+ /player/[Pp]layer\.(?:swf|html)
+ )\?.*?\bnTitleNo=|
+ vod\.afreecatv\.com/PLAYER/STATION/
+ )
+ (?P<id>\d+)
+ '''
+ _NETRC_MACHINE = 'afreecatv'
+ _TESTS = [{
+ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
+ 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
+ 'info_dict': {
+ 'id': '36164052',
+ 'ext': 'mp4',
+ 'title': '데일리 에이프릴 요정들의 시상식!',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ 'upload_date': '20160503',
+ },
+ 'skip': 'Video is gone',
+ }, {
+ 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
+ 'info_dict': {
+ 'id': '36153164',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'md5': 'd8b7c174568da61d774ef0203159bf97',
+ 'info_dict': {
+ 'id': '36153164_1',
+ 'ext': 'mp4',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'upload_date': '20160502',
+ },
+ }, {
+ 'md5': '58f2ce7f6044e34439ab2d50612ab02b',
+ 'info_dict': {
+ 'id': '36153164_2',
+ 'ext': 'mp4',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'upload_date': '20160502',
+ },
+ }],
+ 'skip': 'Video is gone',
+ }, {
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
+ 'info_dict': {
+ 'id': '18650793',
+ 'ext': 'mp4',
+ 'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': '윈아디',
+ 'uploader_id': 'badkids',
+ 'duration': 107,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
+ 'info_dict': {
+ 'id': '10481652',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ 'duration': 6492,
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'md5': 'd8b7c174568da61d774ef0203159bf97',
+ 'info_dict': {
+ 'id': '20160502_c4c62b9d_174361386_1',
+ 'ext': 'mp4',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ 'upload_date': '20160502',
+ 'duration': 3601,
+ },
+ }, {
+ 'md5': '58f2ce7f6044e34439ab2d50612ab02b',
+ 'info_dict': {
+ 'id': '20160502_39e739bb_174361386_2',
+ 'ext': 'mp4',
+ 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': 'dailyapril',
+ 'uploader_id': 'dailyapril',
+ 'upload_date': '20160502',
+ 'duration': 2891,
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # non standard key
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
+ 'info_dict': {
+ 'id': '20170411_BE689A0E_190960999_1_2_h',
+ 'ext': 'mp4',
+ 'title': '혼자사는여자집',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': '♥이슬이',
+ 'uploader_id': 'dasl8121',
+ 'upload_date': '20170411',
+ 'duration': 213,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # PARTIAL_ADULT
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
+ 'info_dict': {
+ 'id': '20180327_27901457_202289533_1',
+ 'ext': 'mp4',
+ 'title': '[생]빨개요♥ (part 1)',
+ 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+ 'uploader': '[SA]서아',
+ 'uploader_id': 'bjdyrksu',
+ 'upload_date': '20180327',
+ 'duration': 3601,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['adult content'],
+ }, {
+ 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def parse_video_key(key):
+ video_key = {}
+ m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
+ if m:
+ video_key['upload_date'] = m.group('upload_date')
+ video_key['part'] = int(m.group('part'))
+ return video_key
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'szWork': 'login',
+ 'szType': 'json',
+ 'szUid': username,
+ 'szPassword': password,
+ 'isSaveId': 'false',
+ 'szScriptVar': 'oLoginRet',
+ 'szAction': '',
+ }
+
+ response = self._download_json(
+ 'https://login.afreecatv.com/app/LoginAction.php', None,
+ 'Logging in', data=urlencode_postdata(login_form))
+
+ _ERRORS = {
+ -4: 'Your account has been suspended due to a violation of our terms and policies.',
+ -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
+ -6: 'https://login.afreecatv.com/membership/changeMember.php',
+ -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
+ -9: 'https://member.afreecatv.com/app/pop_login_block.php',
+ -11: 'https://login.afreecatv.com/afreeca/second_login.php',
+ -12: 'https://member.afreecatv.com/app/user_security.php',
+ 0: 'The username does not exist or you have entered the wrong password.',
+ -1: 'The username does not exist or you have entered the wrong password.',
+ -3: 'You have entered your username/password incorrectly.',
+ -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
+ -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
+ -32008: 'You have failed to log in. Please contact our Help Center.',
+ }
+
+ result = int_or_none(response.get('RESULT'))
+ if result != 1:
+ error = _ERRORS.get(result, 'You have failed to log in.')
+ raise ExtractorError(
+ 'Unable to login: %s said: %s' % (self.IE_NAME, error),
+ expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if re.search(r'alert\(["\']This video has been deleted', webpage):
+ raise ExtractorError(
+ 'Video %s has been deleted' % video_id, expected=True)
+
+ station_id = self._search_regex(
+ r'nStationNo\s*=\s*(\d+)', webpage, 'station')
+ bbs_id = self._search_regex(
+ r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
+ video_id = self._search_regex(
+ r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
+
+ partial_view = False
+ for _ in range(2):
+ query = {
+ 'nTitleNo': video_id,
+ 'nStationNo': station_id,
+ 'nBbsNo': bbs_id,
+ }
+ if partial_view:
+ query['partialView'] = 'SKIP_ADULT'
+ video_xml = self._download_xml(
+ 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
+ video_id, 'Downloading video info XML%s'
+ % (' (skipping adult)' if partial_view else ''),
+ video_id, headers={
+ 'Referer': url,
+ }, query=query)
+
+ flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+ if flag and flag == 'SUCCEED':
+ break
+ if flag == 'PARTIAL_ADULT':
+ self._downloader.report_warning(
+ 'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
+ 'Only content suitable for all ages will be downloaded. '
+ 'Provide account credentials if you wish to download restricted content.')
+ partial_view = True
+ continue
+ elif flag == 'ADULT':
+ error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
+ else:
+ error = flag
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ else:
+ raise ExtractorError('Unable to download video info')
+
+ video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
+ if video_element is None or video_element.text is None:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ video_url = video_element.text.strip()
+
+ title = xpath_text(video_xml, './track/title', 'title', fatal=True)
+
+ uploader = xpath_text(video_xml, './track/nickname', 'uploader')
+ uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
+ duration = int_or_none(xpath_text(
+ video_xml, './track/duration', 'duration'))
+ thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
+
+ common_entry = {
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'thumbnail': thumbnail,
+ }
+
+ info = common_entry.copy()
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ })
+
+ if not video_url:
+ entries = []
+ file_elements = video_element.findall(compat_xpath('./file'))
+ one = len(file_elements) == 1
+ for file_num, file_element in enumerate(file_elements, start=1):
+ file_url = url_or_none(file_element.text)
+ if not file_url:
+ continue
+ key = file_element.get('key', '')
+ upload_date = self._search_regex(
+ r'^(\d{8})_', key, 'upload date', default=None)
+ file_duration = int_or_none(file_element.get('duration'))
+ format_id = key if key else '%s_%s' % (video_id, file_num)
+ if determine_ext(file_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls',
+ note='Downloading part %d m3u8 information' % file_num)
+ else:
+ formats = [{
+ 'url': file_url,
+ 'format_id': 'http',
+ }]
+ if not formats:
+ continue
+ self._sort_formats(formats)
+ file_info = common_entry.copy()
+ file_info.update({
+ 'id': format_id,
+ 'title': title if one else '%s (part %d)' % (title, file_num),
+ 'upload_date': upload_date,
+ 'duration': file_duration,
+ 'formats': formats,
+ })
+ entries.append(file_info)
+ entries_info = info.copy()
+ entries_info.update({
+ '_type': 'multi_video',
+ 'entries': entries,
+ })
+ return entries_info
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
+
+ if determine_ext(video_url) == 'm3u8':
+ info['formats'] = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ app, playpath = video_url.split('mp4:')
+ info.update({
+ 'url': app,
+ 'ext': 'flv',
+ 'play_path': 'mp4:' + playpath,
+ 'rtmp_live': True, # downloading won't end without this
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/airmozilla.py b/hypervideo_dl/extractor/airmozilla.py
new file mode 100644
index 0000000..9e38136
--- /dev/null
+++ b/hypervideo_dl/extractor/airmozilla.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class AirMozillaIE(InfoExtractor):
+ _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
+ _TEST = {
+ 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
+ 'md5': '8d02f53ee39cf006009180e21df1f3ba',
+ 'info_dict': {
+ 'id': '6x4q2w',
+ 'ext': 'mp4',
+ 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
+ 'thumbnail': r're:https?://.*/poster\.jpg',
+ 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
+ 'timestamp': 1422487800,
+ 'upload_date': '20150128',
+ 'location': 'SFO Commons',
+ 'duration': 3780,
+ 'view_count': int,
+ 'categories': ['Main', 'Privacy'],
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
+
+ embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
+ jwconfig = self._parse_json(self._search_regex(
+ r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
+
+ info_dict = self._parse_jwplayer_data(jwconfig, video_id)
+ view_count = int_or_none(self._html_search_regex(
+ r'Views since archived: ([0-9]+)',
+ webpage, 'view count', fatal=False))
+ timestamp = parse_iso8601(self._html_search_regex(
+ r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
+ duration = parse_duration(self._search_regex(
+ r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
+ webpage, 'duration', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'url': self._og_search_url(webpage),
+ 'display_id': display_id,
+ 'description': self._og_search_description(webpage),
+ 'timestamp': timestamp,
+ 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
+ 'duration': duration,
+ 'view_count': view_count,
+ 'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/aliexpress.py b/hypervideo_dl/extractor/aliexpress.py
new file mode 100644
index 0000000..6f241e6
--- /dev/null
+++ b/hypervideo_dl/extractor/aliexpress.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ try_get,
+)
+
+
+class AliExpressLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://live.aliexpress.com/live/2800002704436634',
+ 'md5': 'e729e25d47c5e557f2630eaf99b740a5',
+ 'info_dict': {
+ 'id': '2800002704436634',
+ 'ext': 'mp4',
+ 'title': 'CASIMA7.22',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'uploader': 'CASIMA Official Store',
+ 'timestamp': 1500717600,
+ 'upload_date': '20170722',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
+ webpage, 'runParams'),
+ video_id)
+
+ title = data['title']
+
+ formats = self._extract_m3u8_formats(
+ data['replyStreamUrl'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': data.get('coverUrl'),
+ 'uploader': try_get(
+ data, lambda x: x['followBar']['name'], compat_str),
+ 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/aljazeera.py b/hypervideo_dl/extractor/aljazeera.py
new file mode 100644
index 0000000..c4f915a
--- /dev/null
+++ b/hypervideo_dl/extractor/aljazeera.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class AlJazeeraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
+ 'info_dict': {
+ 'id': '3792260579001',
+ 'ext': 'mp4',
+ 'title': 'The Slum - Episode 1: Deliverance',
+ 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
+ 'uploader_id': '665003303001',
+ 'timestamp': 1411116829,
+ 'upload_date': '20140919',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ 'skip': 'Not accessible from Travis CI server',
+ }, {
+ 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ post_type, name = re.match(self._VALID_URL, url).groups()
+ post_type = {
+ 'features': 'post',
+ 'program': 'episode',
+ 'videos': 'video',
+ }[post_type.split('/')[0]]
+ video = self._download_json(
+ 'https://www.aljazeera.com/graphql', name, query={
+ 'operationName': 'SingleArticleQuery',
+ 'variables': json.dumps({
+ 'name': name,
+ 'postType': post_type,
+ }),
+ }, headers={
+ 'wp-site': 'aje',
+ })['data']['article']['video']
+ video_id = video['id']
+ account_id = video.get('accountId') or '665003303001'
+ player_id = video.get('playerId') or 'BkeSH5BDb'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+ 'BrightcoveNew', video_id)
diff --git a/hypervideo_dl/extractor/allocine.py b/hypervideo_dl/extractor/allocine.py
new file mode 100644
index 0000000..cd533ac
--- /dev/null
+++ b/hypervideo_dl/extractor/allocine.py
@@ -0,0 +1,132 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ qualities,
+ remove_end,
+ try_get,
+ unified_timestamp,
+ url_basename,
+)
+
+
+class AllocineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
+
+ _TESTS = [{
+ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
+ 'md5': '0c9fcf59a841f65635fa300ac43d8269',
+ 'info_dict': {
+ 'id': '19546517',
+ 'display_id': '18635087',
+ 'ext': 'mp4',
+ 'title': 'Astérix - Le Domaine des Dieux Teaser VF',
+ 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 39,
+ 'timestamp': 1404273600,
+ 'upload_date': '20140702',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
+ 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
+ 'info_dict': {
+ 'id': '19540403',
+ 'display_id': '19540403',
+ 'ext': 'mp4',
+ 'title': 'Planes 2 Bande-annonce VF',
+ 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 69,
+ 'timestamp': 1385659800,
+ 'upload_date': '20131128',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
+ 'md5': '101250fb127ef9ca3d73186ff22a47ce',
+ 'info_dict': {
+ 'id': '19544709',
+ 'display_id': '19544709',
+ 'ext': 'mp4',
+ 'title': 'Dragons 2 - Bande annonce finale VF',
+ 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 144,
+ 'timestamp': 1397589900,
+ 'upload_date': '20140415',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/video-19550147/',
+ 'md5': '3566c0668c0235e2d224fd8edb389f67',
+ 'info_dict': {
+ 'id': '19550147',
+ 'ext': 'mp4',
+ 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
+ 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ formats = []
+ quality = qualities(['ld', 'md', 'hd'])
+
+ model = self._html_search_regex(
+ r'data-model="([^"]+)"', webpage, 'data model', default=None)
+ if model:
+ model_data = self._parse_json(model, display_id)
+ video = model_data['videos'][0]
+ title = video['title']
+ for video_url in video['sources'].values():
+ video_id, format_id = url_basename(video_url).split('_')[:2]
+ formats.append({
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'url': video_url,
+ })
+ duration = int_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('view_count'))
+ timestamp = unified_timestamp(try_get(
+ video, lambda x: x['added_at']['date'], compat_str))
+ else:
+ video_id = display_id
+ media_data = self._download_json(
+ 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
+ title = remove_end(
+ self._html_search_regex(
+ r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
+ ' - AlloCiné')
+ for key, value in media_data['video'].items():
+ if not key.endswith('Path'):
+ continue
+ format_id = key[:-len('Path')]
+ formats.append({
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'url': value,
+ })
+ duration, view_count, timestamp = [None] * 3
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/alphaporno.py b/hypervideo_dl/extractor/alphaporno.py
new file mode 100644
index 0000000..3a6d99f
--- /dev/null
+++ b/hypervideo_dl/extractor/alphaporno.py
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ parse_duration,
+ parse_filesize,
+ int_or_none,
+)
+
+
+class AlphaPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
+ 'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+ 'info_dict': {
+ 'id': '258807',
+ 'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
+ 'ext': 'mp4',
+ 'title': 'Sensual striptease porn with Samantha Alexandra',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1418694611,
+ 'upload_date': '20141216',
+ 'duration': 387,
+ 'filesize_approx': 54120000,
+ 'tbr': 1145,
+ 'categories': list,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
+
+ video_url = self._search_regex(
+ r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+ ext = self._html_search_meta(
+ 'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+
+ title = self._search_regex(
+ [r'<meta content="([^"]+)" itemprop="description">',
+ r'class="title" itemprop="name">([^<]+)<'],
+ webpage, 'title')
+ thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date'))
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+ filesize_approx = parse_filesize(self._html_search_meta(
+ 'contentSize', webpage, 'file size'))
+ bitrate = int_or_none(self._html_search_meta(
+ 'bitrate', webpage, 'bitrate'))
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'tbr': bitrate,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ }
diff --git a/hypervideo_dl/extractor/amara.py b/hypervideo_dl/extractor/amara.py
new file mode 100644
index 0000000..61d4695
--- /dev/null
+++ b/hypervideo_dl/extractor/amara.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from .vimeo import VimeoIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ update_url_query,
+)
+
+
+class AmaraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
+ _TESTS = [{
+ # Youtube
+ 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
+ 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
+ 'info_dict': {
+ 'id': 'h6ZuVdvYnfE',
+ 'ext': 'mp4',
+ 'title': 'Why jury trials are becoming less common',
+ 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': dict,
+ 'upload_date': '20160813',
+ 'uploader': 'PBS NewsHour',
+ 'uploader_id': 'PBSNewsHour',
+ 'timestamp': 1549639570,
+ }
+ }, {
+ # Vimeo
+ 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
+ 'md5': '99392c75fa05d432a8f11df03612195e',
+ 'info_dict': {
+ 'id': '18622084',
+ 'ext': 'mov',
+ 'title': 'Vimeo at CES 2011!',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': dict,
+ 'timestamp': 1294763658,
+ 'upload_date': '20110111',
+ 'uploader': 'Sam Morrill',
+ 'uploader_id': 'sammorrill'
+ }
+ }, {
+ # Direct Link
+ 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
+ 'md5': 'd3970f08512738ee60c5807311ff5d3f',
+ 'info_dict': {
+ 'id': 's8KL7I3jLmh6',
+ 'ext': 'mp4',
+ 'title': 'The danger of a single story',
+ 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': dict,
+ 'upload_date': '20091007',
+ 'timestamp': 1254942511,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ meta = self._download_json(
+ 'https://amara.org/api/videos/%s/' % video_id,
+ video_id, query={'format': 'json'})
+ title = meta['title']
+ video_url = meta['all_urls'][0]
+
+ subtitles = {}
+ for language in (meta.get('languages') or []):
+ subtitles_uri = language.get('subtitles_uri')
+ if not (subtitles_uri and language.get('published')):
+ continue
+ subtitle = subtitles.setdefault(language.get('code') or 'en', [])
+ for f in ('json', 'srt', 'vtt'):
+ subtitle.append({
+ 'ext': f,
+ 'url': update_url_query(subtitles_uri, {'format': f}),
+ })
+
+ info = {
+ 'url': video_url,
+ 'id': video_id,
+ 'subtitles': subtitles,
+ 'title': title,
+ 'description': meta.get('description'),
+ 'thumbnail': meta.get('thumbnail'),
+ 'duration': int_or_none(meta.get('duration')),
+ 'timestamp': parse_iso8601(meta.get('created')),
+ }
+
+ for ie in (YoutubeIE, VimeoIE):
+ if ie.suitable(video_url):
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': ie.ie_key(),
+ })
+ break
+
+ return info
diff --git a/hypervideo_dl/extractor/amcnetworks.py b/hypervideo_dl/extractor/amcnetworks.py
new file mode 100644
index 0000000..b8027bb
--- /dev/null
+++ b/hypervideo_dl/extractor/amcnetworks.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ try_get,
+ update_url_query,
+)
+
+
+class AMCNetworksIE(ThePlatformIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
+ 'info_dict': {
+ 'id': '4Lq1dzOnZGt0',
+ 'ext': 'mp4',
+ 'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
+ 'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
+ 'upload_date': '20201120',
+ 'timestamp': 1605904350,
+ 'uploader': 'AMCN',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ifc.com/movies/chaos',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
+ 'only_matching': True,
+ }]
+ _REQUESTOR_ID_MAP = {
+ 'amc': 'AMC',
+ 'bbcamerica': 'BBCA',
+ 'ifc': 'IFC',
+ 'sundancetv': 'SUNDANCE',
+ 'wetv': 'WETV',
+ }
+
+ def _real_extract(self, url):
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ requestor_id = self._REQUESTOR_ID_MAP[site]
+ properties = self._download_json(
+ 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
+ display_id)['data']['properties']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ tp_path = 'M_UwQC/media/' + properties['videoPid']
+ media_url = 'https://link.theplatform.com/s/' + tp_path
+ theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
+ info = self._parse_theplatform_metadata(theplatform_metadata)
+ video_id = theplatform_metadata['pid']
+ title = theplatform_metadata['title']
+ rating = try_get(
+ theplatform_metadata, lambda x: x['ratings'][0]['rating'])
+ video_category = properties.get('videoCategory')
+ if video_category and video_category.endswith('-Auth'):
+ resource = self._get_mvpd_resource(
+ requestor_id, title, video_id, rating)
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ media_url = update_url_query(media_url, query)
+ formats, subtitles = self._extract_theplatform_smil(
+ media_url, video_id)
+ self._sort_formats(formats)
+ info.update({
+ 'id': video_id,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'age_limit': parse_age_limit(parse_age_limit(rating)),
+ })
+ ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
+ if ns_keys:
+ ns = list(ns_keys)[0]
+ series = theplatform_metadata.get(ns + '$show')
+ season_number = int_or_none(
+ theplatform_metadata.get(ns + '$season'))
+ episode = theplatform_metadata.get(ns + '$episodeTitle')
+ episode_number = int_or_none(
+ theplatform_metadata.get(ns + '$episode'))
+ if season_number:
+ title = 'Season %d - %s' % (season_number, title)
+ if series:
+ title = '%s - %s' % (series, title)
+ info.update({
+ 'title': title,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/americastestkitchen.py b/hypervideo_dl/extractor/americastestkitchen.py
new file mode 100644
index 0000000..be960c0
--- /dev/null
+++ b/hypervideo_dl/extractor/americastestkitchen.py
@@ -0,0 +1,159 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+)
+
+
+class AmericasTestKitchenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
+ 'md5': 'b861c3e365ac38ad319cfd509c30577f',
+ 'info_dict': {
+ 'id': '5b400b9ee338f922cb06450c',
+ 'title': 'Japanese Suppers',
+ 'ext': 'mp4',
+ 'description': 'md5:64e606bfee910627efc4b5f050de92b3',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1523318400,
+ 'upload_date': '20180410',
+ 'release_date': '20180410',
+ 'series': "America's Test Kitchen",
+ 'season_number': 18,
+ 'episode': 'Japanese Suppers',
+ 'episode_number': 15,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+ 'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+ 'md5': '06451608c57651e985a498e69cec17e5',
+ 'info_dict': {
+ 'id': '5fbe8c61bda2010001c6763b',
+ 'title': 'Simple Chicken Dinner',
+ 'ext': 'mp4',
+ 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1610755200,
+ 'upload_date': '20210116',
+ 'release_date': '20210116',
+ 'series': "America's Test Kitchen",
+ 'season_number': 21,
+ 'episode': 'Simple Chicken Dinner',
+ 'episode_number': 3,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ resource_type, video_id = re.match(self._VALID_URL, url).groups()
+ is_episode = resource_type == 'episode'
+ if is_episode:
+ resource_type = 'episodes'
+
+ resource = self._download_json(
+ 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
+ video = resource['video'] if is_episode else resource
+ episode = resource if is_episode else resource.get('episode') or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
+ 'ie_key': 'Zype',
+ 'description': clean_html(video.get('description')),
+ 'timestamp': unified_timestamp(video.get('publishDate')),
+ 'release_date': unified_strdate(video.get('publishDate')),
+ 'episode_number': int_or_none(episode.get('number')),
+ 'season_number': int_or_none(episode.get('season')),
+ 'series': try_get(episode, lambda x: x['show']['title']),
+ 'episode': episode.get('title'),
+ }
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+ _TESTS = [{
+ # ATK Season
+ 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+ 'info_dict': {
+ 'id': 'season_1',
+ 'title': 'Season 1',
+ },
+ 'playlist_count': 13,
+ }, {
+ # Cooks Country Season
+ 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+ 'info_dict': {
+ 'id': 'season_12',
+ 'title': 'Season 12',
+ },
+ 'playlist_count': 13,
+ }]
+
+ def _real_extract(self, url):
+ show_name, season_number = re.match(self._VALID_URL, url).groups()
+ season_number = int(season_number)
+
+ slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+
+ season = 'Season %d' % season_number
+
+ season_search = self._download_json(
+ 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+ season, headers={
+ 'Origin': 'https://www.%s.com' % show_name,
+ 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+ 'X-Algolia-Application-Id': 'Y1FNZXUI30',
+ }, query={
+ 'facetFilters': json.dumps([
+ 'search_season_list:' + season,
+ 'search_document_klass:episode',
+ 'search_show_slug:' + slug,
+ ]),
+ 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+ 'attributesToHighlight': '',
+ 'hitsPerPage': 1000,
+ })
+
+ def entries():
+ for episode in (season_search.get('hits') or []):
+ search_url = episode.get('search_url')
+ if not search_url:
+ continue
+ yield {
+ '_type': 'url',
+ 'url': 'https://www.%s.com%s' % (show_name, search_url),
+ 'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'timestamp': unified_timestamp(episode.get('search_document_date')),
+ 'season_number': season_number,
+ 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+ 'ie_key': AmericasTestKitchenIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), 'season_%d' % season_number, season)
diff --git a/hypervideo_dl/extractor/amp.py b/hypervideo_dl/extractor/amp.py
new file mode 100644
index 0000000..24c684c
--- /dev/null
+++ b/hypervideo_dl/extractor/amp.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class AMPIE(InfoExtractor):
+ # parse Akamai Adaptive Media Player feed
+ def _extract_feed_info(self, url):
+ feed = self._download_json(
+ url, None, 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed')
+ item = feed.get('channel', {}).get('item')
+ if not item:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
+
+ video_id = item['guid']
+
+ def get_media_node(name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
+ thumbnails = []
+ media_thumbnail = get_media_node('thumbnail')
+ if media_thumbnail:
+ if isinstance(media_thumbnail, dict):
+ media_thumbnail = [media_thumbnail]
+ for thumbnail_data in media_thumbnail:
+ thumbnail = thumbnail_data.get('@attributes', {})
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail_url, 'http:'),
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ subtitles = {}
+ media_subtitle = get_media_node('subTitle')
+ if media_subtitle:
+ if isinstance(media_subtitle, dict):
+ media_subtitle = [media_subtitle]
+ for subtitle_data in media_subtitle:
+ subtitle = subtitle_data.get('@attributes', {})
+ subtitle_href = url_or_none(subtitle.get('href'))
+ if not subtitle_href:
+ continue
+ subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
+ 'url': subtitle_href,
+ 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
+ })
+
+ formats = []
+ media_content = get_media_node('content')
+ if isinstance(media_content, dict):
+ media_content = [media_content]
+ for media_data in media_content:
+ media = media_data.get('@attributes', {})
+ media_url = url_or_none(media.get('url'))
+ if not media_url:
+ continue
+ ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
+ video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
+ 'url': media_url,
+ 'tbr': int_or_none(media.get('bitrate')),
+ 'filesize': int_or_none(media.get('fileSize')),
+ 'ext': ext,
+ })
+
+ self._sort_formats(formats)
+
+ timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+
+ return {
+ 'id': video_id,
+ 'title': get_media_node('title'),
+ 'description': get_media_node('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/animeondemand.py b/hypervideo_dl/extractor/animeondemand.py
new file mode 100644
index 0000000..54e097d
--- /dev/null
+++ b/hypervideo_dl/extractor/animeondemand.py
@@ -0,0 +1,299 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class AnimeOnDemandIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
+ _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
+ _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
+ _NETRC_MACHINE = 'animeondemand'
+ # German-speaking countries of Europe
+ _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
+ _TESTS = [{
+ # jap, OmU
+ 'url': 'https://www.anime-on-demand.de/anime/161',
+ 'info_dict': {
+ 'id': '161',
+ 'title': 'Grimgar, Ashes and Illusions (OmU)',
+ 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # Film wording is used instead of Episode, ger/jap, Dub/OmU
+ 'url': 'https://www.anime-on-demand.de/anime/39',
+ 'only_matching': True,
+ }, {
+ # Episodes without titles, jap, OmU
+ 'url': 'https://www.anime-on-demand.de/anime/162',
+ 'only_matching': True,
+ }, {
+ # ger/jap, Dub/OmU, account required
+ 'url': 'https://www.anime-on-demand.de/anime/169',
+ 'only_matching': True,
+ }, {
+ # Full length film, non-series, ger/jap, Dub/OmU, account required
+ 'url': 'https://www.anime-on-demand.de/anime/185',
+ 'only_matching': True,
+ }, {
+ # Flash videos
+ 'url': 'https://www.anime-on-demand.de/anime/12',
+ 'only_matching': True,
+ }]
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
+ self.raise_geo_restricted(
+ '%s is only available in German-speaking countries of Europe' % self.IE_NAME)
+
+ login_form = self._form_hidden_inputs('new_user', login_page)
+
+ login_form.update({
+ 'user[login]': username,
+ 'user[password]': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in',
+ data=urlencode_postdata(login_form), headers={
+ 'Referer': self._LOGIN_URL,
+ })
+
+ if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
+ error = self._search_regex(
+ r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
+ response, 'error', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ anime_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, anime_id)
+
+ if 'data-playlist=' not in webpage:
+ self._download_webpage(
+ self._APPLY_HTML5_URL, anime_id,
+ 'Activating HTML5 beta', 'Unable to apply HTML5 beta')
+ webpage = self._download_webpage(url, anime_id)
+
+ csrf_token = self._html_search_meta(
+ 'csrf-token', webpage, 'csrf token', fatal=True)
+
+ anime_title = self._html_search_regex(
+ r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
+ webpage, 'anime name')
+ anime_description = self._html_search_regex(
+ r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
+ webpage, 'anime description', default=None)
+
+ def extract_info(html, video_id, num=None):
+ title, description = [None] * 2
+ formats = []
+
+ for input_ in re.findall(
+ r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
+ attributes = extract_attributes(input_)
+ title = attributes.get('data-dialog-header')
+ playlist_urls = []
+ for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
+ playlist_url = attributes.get(playlist_key)
+ if isinstance(playlist_url, compat_str) and re.match(
+ r'/?[\da-zA-Z]+', playlist_url):
+ playlist_urls.append(attributes[playlist_key])
+ if not playlist_urls:
+ continue
+
+ lang = attributes.get('data-lang')
+ lang_note = attributes.get('value')
+
+ for playlist_url in playlist_urls:
+ kind = self._search_regex(
+ r'videomaterialurl/\d+/([^/]+)/',
+ playlist_url, 'media kind', default=None)
+ format_id_list = []
+ if lang:
+ format_id_list.append(lang)
+ if kind:
+ format_id_list.append(kind)
+ if not format_id_list and num is not None:
+ format_id_list.append(compat_str(num))
+ format_id = '-'.join(format_id_list)
+ format_note = ', '.join(filter(None, (kind, lang_note)))
+ item_id_list = []
+ if format_id:
+ item_id_list.append(format_id)
+ item_id_list.append('videomaterial')
+ playlist = self._download_json(
+ urljoin(url, playlist_url), video_id,
+ 'Downloading %s JSON' % ' '.join(item_id_list),
+ headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-CSRF-Token': csrf_token,
+ 'Referer': url,
+ 'Accept': 'application/json, text/javascript, */*; q=0.01',
+ }, fatal=False)
+ if not playlist:
+ continue
+ stream_url = url_or_none(playlist.get('streamurl'))
+ if stream_url:
+ rtmp = re.search(
+ r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
+ stream_url)
+ if rtmp:
+ formats.append({
+ 'url': rtmp.group('url'),
+ 'app': rtmp.group('app'),
+ 'play_path': rtmp.group('playpath'),
+ 'page_url': url,
+ 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
+ 'rtmp_real_time': True,
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ })
+ continue
+ start_video = playlist.get('startvideo', 0)
+ playlist = playlist.get('playlist')
+ if not playlist or not isinstance(playlist, list):
+ continue
+ playlist = playlist[start_video]
+ title = playlist.get('title')
+ if not title:
+ continue
+ description = playlist.get('description')
+ for source in playlist.get('sources', []):
+ file_ = source.get('file')
+ if not file_:
+ continue
+ ext = determine_ext(file_)
+ format_id_list = [lang, kind]
+ if ext == 'm3u8':
+ format_id_list.append('hls')
+ elif source.get('type') == 'video/dash' or ext == 'mpd':
+ format_id_list.append('dash')
+ format_id = '-'.join(filter(None, format_id_list))
+ if ext == 'm3u8':
+ file_formats = self._extract_m3u8_formats(
+ file_, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
+ elif source.get('type') == 'video/dash' or ext == 'mpd':
+ continue
+ file_formats = self._extract_mpd_formats(
+ file_, video_id, mpd_id=format_id, fatal=False)
+ else:
+ continue
+ for f in file_formats:
+ f.update({
+ 'language': lang,
+ 'format_note': format_note,
+ })
+ formats.extend(file_formats)
+
+ return {
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ }
+
+ def extract_entries(html, video_id, common_info, num=None):
+ info = extract_info(html, video_id, num)
+
+ if info['formats']:
+ self._sort_formats(info['formats'])
+ f = common_info.copy()
+ f.update(info)
+ yield f
+
+ # Extract teaser/trailer only when full episode is not available
+ if not info['formats']:
+ m = re.search(
+ r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
+ html)
+ if m:
+ f = common_info.copy()
+ f.update({
+ 'id': '%s-%s' % (f['id'], m.group('kind').lower()),
+ 'title': m.group('title'),
+ 'url': urljoin(url, m.group('href')),
+ })
+ yield f
+
+ def extract_episodes(html):
+ for num, episode_html in enumerate(re.findall(
+ r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
+ episodebox_title = self._search_regex(
+ (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
+ r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
+ episode_html, 'episodebox title', default=None, group='title')
+ if not episodebox_title:
+ continue
+
+ episode_number = int(self._search_regex(
+ r'(?:Episode|Film)\s*(\d+)',
+ episodebox_title, 'episode number', default=num))
+ episode_title = self._search_regex(
+ r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
+ episodebox_title, 'episode title', default=None)
+
+ video_id = 'episode-%d' % episode_number
+
+ common_info = {
+ 'id': video_id,
+ 'series': anime_title,
+ 'episode': episode_title,
+ 'episode_number': episode_number,
+ }
+
+ for e in extract_entries(episode_html, video_id, common_info):
+ yield e
+
+ def extract_film(html, video_id):
+ common_info = {
+ 'id': anime_id,
+ 'title': anime_title,
+ 'description': anime_description,
+ }
+ for e in extract_entries(html, video_id, common_info):
+ yield e
+
+ def entries():
+ has_episodes = False
+ for e in extract_episodes(webpage):
+ has_episodes = True
+ yield e
+
+ if not has_episodes:
+ for e in extract_film(webpage, anime_id):
+ yield e
+
+ return self.playlist_result(
+ entries(), anime_id, anime_title, anime_description)
diff --git a/hypervideo_dl/extractor/anvato.py b/hypervideo_dl/extractor/anvato.py
new file mode 100644
index 0000000..b739856
--- /dev/null
+++ b/hypervideo_dl/extractor/anvato.py
@@ -0,0 +1,381 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import hashlib
+import json
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..aes import aes_encrypt
+from ..compat import compat_str
+from ..utils import (
+ bytes_to_intlist,
+ determine_ext,
+ intlist_to_bytes,
+ int_or_none,
+ strip_jsonp,
+ unescapeHTML,
+ unsmuggle_url,
+)
+
+
+def md5_text(s):
+ if not isinstance(s, compat_str):
+ s = compat_str(s)
+ return hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+class AnvatoIE(InfoExtractor):
+ _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
+
+ # Copied from anvplayer.min.js
+ _ANVACK_TABLE = {
+ 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
+ 'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
+ 'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
+ 'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
+ 'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
+ 'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
+ 'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
+ 'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
+ 'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
+ 'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
+ 'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
+ 'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
+ 'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
+ 'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
+ 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
+ 'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
+ 'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
+ 'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
+ 'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
+ 'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
+ 'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
+ 'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
+ 'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
+ 'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
+ 'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
+ 'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
+ 'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
+ 'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
+ 'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
+ 'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
+ 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
+ 'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
+ 'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
+ 'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
+ 'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
+ 'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
+ 'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
+ 'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
+ 'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
+ 'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
+ 'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
+ 'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
+ 'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
+ 'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
+ 'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
+ 'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
+ 'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
+ 'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
+ 'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
+ 'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
+ 'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
+ 'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
+ 'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
+ 'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
+ 'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
+ 'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
+ 'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
+ 'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
+ 'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
+ 'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
+ 'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
+ 'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
+ 'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
+ 'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
+ 'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
+ 'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
+ 'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
+ 'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
+ 'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
+ 'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
+ 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
+ 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
+ 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
+ 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
+ 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
+ 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
+ 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
+ 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
+ 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
+ 'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
+ 'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
+ 'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
+ 'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
+ 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
+ 'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z',
+ 'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B',
+ 'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj',
+ 'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l',
+ '04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P',
+ 'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A',
+ 'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V',
+ 'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z',
+ 'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9',
+ 'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e',
+ 'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D',
+ 'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d',
+ 'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ',
+ 'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V',
+ 'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe',
+ 'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP',
+ '3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV',
+ 'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v',
+ 'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q',
+ 'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV',
+ 'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r',
+ 'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR',
+ 'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0',
+ 'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl',
+ 'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923',
+ '7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P',
+ '3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa',
+ '3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V',
+ 'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5',
+ 'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ',
+ 'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye',
+ 'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o',
+ 'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e',
+ 'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z',
+ 'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R',
+ '7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29',
+ 'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q',
+ 'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp',
+ 'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze',
+ '5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ',
+ '70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa',
+ '26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ',
+ 'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL',
+ 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo',
+ 'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV',
+ '3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa',
+ 'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y',
+ '7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P',
+ 'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO',
+ 'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr',
+ '5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy',
+ 'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn',
+ '3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj',
+ 'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29',
+ 'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V',
+ 'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5',
+ 'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy',
+ 'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e',
+ '5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y',
+ 'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0',
+ 'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy',
+ 'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV',
+ 'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K',
+ 'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23',
+ 'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR',
+ 'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R',
+ 'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ',
+ 'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L',
+ 'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR',
+ }
+
+ _MCP_TO_ACCESS_KEY_TABLE = {
+ 'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
+ 'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
+ 'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+ 'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+ 'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
+ 'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+ 'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+ 'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
+ 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
+ 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
+ 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
+ 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
+ }
+
+ _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
+
+ _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
+ _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
+
+ _TESTS = [{
+ # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
+ 'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
+ 'info_dict': {
+ 'id': '4465496',
+ 'ext': 'mp4',
+ 'title': 'VIDEO: Humpback whale breaches right next to NH boat',
+ 'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
+ 'duration': 22,
+ 'timestamp': 1534855680,
+ 'upload_date': '20180821',
+ 'uploader': 'ANV',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
+ 'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
+ 'only_matching': True,
+ }]
+
+ def __init__(self, *args, **kwargs):
+ super(AnvatoIE, self).__init__(*args, **kwargs)
+ self.__server_time = None
+
+ def _server_time(self, access_key, video_id):
+ if self.__server_time is not None:
+ return self.__server_time
+
+ self.__server_time = int(self._download_json(
+ self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
+ note='Fetching server time')['server_time'])
+
+ return self.__server_time
+
+ def _api_prefix(self, access_key):
+ return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
+
+ def _get_video_json(self, access_key, video_id):
+ # See et() in anvplayer.min.js, which is an alias of getVideoJSON()
+ video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
+ server_time = self._server_time(access_key, video_id)
+ input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
+
+ auth_secret = intlist_to_bytes(aes_encrypt(
+ bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
+
+ video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
+ anvrid = md5_text(time.time() * 1000 * random.random())[:30]
+ api = {
+ 'anvrid': anvrid,
+ 'anvts': server_time,
+ }
+ api['anvstk'] = md5_text('%s|%s|%d|%s' % (
+ access_key, anvrid, server_time,
+ self._ANVACK_TABLE.get(access_key, self._API_KEY)))
+
+ return self._download_json(
+ video_data_url, video_id, transform_source=strip_jsonp,
+ data=json.dumps({'api': api}).encode('utf-8'))
+
+ def _get_anvato_videos(self, access_key, video_id):
+ video_data = self._get_video_json(access_key, video_id)
+
+ formats = []
+ for published_url in video_data['published_urls']:
+ video_url = published_url['embed_url']
+ media_format = published_url.get('format')
+ ext = determine_ext(video_url)
+
+ if ext == 'smil' or media_format == 'smil':
+ formats.extend(self._extract_smil_formats(video_url, video_id))
+ continue
+
+ tbr = int_or_none(published_url.get('kbps'))
+ a_format = {
+ 'url': video_url,
+ 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
+ 'tbr': tbr if tbr != 0 else None,
+ }
+
+ if media_format == 'm3u8' and tbr is not None:
+ a_format.update({
+ 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
+ 'ext': 'mp4',
+ })
+ elif media_format == 'm3u8-variant' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ elif ext == 'mp3' or media_format == 'mp3':
+ a_format['vcodec'] = 'none'
+ else:
+ a_format.update({
+ 'width': int_or_none(published_url.get('width')),
+ 'height': int_or_none(published_url.get('height')),
+ })
+ formats.append(a_format)
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ a_caption = {
+ 'url': caption['url'],
+ 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
+ }
+ subtitles.setdefault(caption['language'], []).append(a_caption)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_data.get('def_title'),
+ 'description': video_data.get('def_description'),
+ 'tags': video_data.get('def_tags', '').split(','),
+ 'categories': video_data.get('categories'),
+ 'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'),
+ 'timestamp': int_or_none(video_data.get(
+ 'ts_published') or video_data.get('ts_added')),
+ 'uploader': video_data.get('mcp_id'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'subtitles': subtitles,
+ }
+
+ @staticmethod
+ def _extract_urls(ie, webpage, video_id):
+ entries = []
+ for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
+ anvplayer_data = ie._parse_json(
+ mobj.group('anvp'), video_id, transform_source=unescapeHTML,
+ fatal=False)
+ if not anvplayer_data:
+ continue
+ video = anvplayer_data.get('video')
+ if not isinstance(video, compat_str) or not video.isdigit():
+ continue
+ access_key = anvplayer_data.get('accessKey')
+ if not access_key:
+ mcp = anvplayer_data.get('mcp')
+ if mcp:
+ access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
+ mcp.lower())
+ if not access_key:
+ continue
+ entries.append(ie.url_result(
+ 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
+ video_id=video))
+ return entries
+
+ def _extract_anvato_videos(self, webpage, video_id):
+ anvplayer_data = self._parse_json(
+ self._html_search_regex(
+ self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
+ video_id)
+ return self._get_anvato_videos(
+ anvplayer_data['accessKey'], anvplayer_data['video'])
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ mobj = re.match(self._VALID_URL, url)
+ access_key, video_id = mobj.group('access_key_or_mcp', 'id')
+ if access_key not in self._ANVACK_TABLE:
+ access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
+ access_key) or access_key
+ return self._get_anvato_videos(access_key, video_id)
diff --git a/hypervideo_dl/extractor/aol.py b/hypervideo_dl/extractor/aol.py
new file mode 100644
index 0000000..f6ecb84
--- /dev/null
+++ b/hypervideo_dl/extractor/aol.py
@@ -0,0 +1,139 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .yahoo import YahooIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ url_or_none,
+)
+
+
+class AolIE(YahooIE):
+ IE_NAME = 'aol.com'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
+
+ _TESTS = [{
+ # video with 5min ID
+ 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',
+ 'md5': '18ef68f48740e86ae94b98da815eec42',
+ 'info_dict': {
+ 'id': '518167793',
+ 'ext': 'mp4',
+ 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+ 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.',
+ 'timestamp': 1395405060,
+ 'upload_date': '20140321',
+ 'uploader': 'Newsy Studio',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # video with vidible ID
+ 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
+ 'info_dict': {
+ 'id': '5707d6b8e4b090497b04f706',
+ 'ext': 'mp4',
+ 'title': 'Netflix is Raising Rates',
+ 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.',
+ 'upload_date': '20160408',
+ 'timestamp': 1460123280,
+ 'uploader': 'Veuer',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',
+ 'only_matching': True,
+ }, {
+ 'url': 'aol-video:5707d6b8e4b090497b04f706',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
+ 'only_matching': True,
+ }, {
+ # Yahoo video
+ 'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if '-' in video_id:
+ return self._extract_yahoo_video(video_id, 'us')
+
+ response = self._download_json(
+ 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
+ video_id)['response']
+ if response['statusText'] != 'Ok':
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
+
+ video_data = response['data']
+ formats = []
+ m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ for rendition in video_data.get('renditions', []):
+ video_url = url_or_none(rendition.get('url'))
+ if not video_url:
+ continue
+ ext = rendition.get('format')
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ f = {
+ 'url': video_url,
+ 'format_id': rendition.get('quality'),
+ }
+ mobj = re.search(r'(\d+)x(\d+)', video_url)
+ if mobj:
+ f.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+ else:
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query)
+ f.update({
+ 'width': int_or_none(qs.get('w', [None])[0]),
+ 'height': int_or_none(qs.get('h', [None])[0]),
+ })
+ formats.append(f)
+ self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('publishDate')),
+ 'view_count': int_or_none(video_data.get('views')),
+ 'description': video_data.get('description'),
+ 'uploader': video_data.get('videoOwner'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/apa.py b/hypervideo_dl/extractor/apa.py
new file mode 100644
index 0000000..cbc1c0e
--- /dev/null
+++ b/hypervideo_dl/extractor/apa.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ url_or_none,
+)
+
+
+class APAIE(InfoExtractor):
+ _VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'md5': '2b12292faeb0a7d930c778c7a5b4759b',
+ 'info_dict': {
+ 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'ext': 'mp4',
+ 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, base_url = mobj.group('id', 'base_url')
+
+ webpage = self._download_webpage(
+ '%s/player/%s' % (base_url, video_id), video_id)
+
+ jwplatform_id = self._search_regex(
+ r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
+ 'jwplatform id', default=None)
+
+ if jwplatform_id:
+ return self.url_result(
+ 'jwplatform:' + jwplatform_id, ie='JWPlatform',
+ video_id=video_id)
+
+ def extract(field, name=None):
+ return self._search_regex(
+ r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
+ webpage, name or field, default=None, group='value')
+
+ title = extract('title') or video_id
+ description = extract('description')
+ thumbnail = extract('poster', 'thumbnail')
+
+ formats = []
+ for format_id in ('hls', 'progressive'):
+ source_url = url_or_none(extract(format_id))
+ if not source_url:
+ continue
+ ext = determine_ext(source_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ height = int_or_none(self._search_regex(
+ r'(\d+)\.mp4', source_url, 'height', default=None))
+ formats.append({
+ 'url': source_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/aparat.py b/hypervideo_dl/extractor/aparat.py
new file mode 100644
index 0000000..a9527e7
--- /dev/null
+++ b/hypervideo_dl/extractor/aparat.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_id,
+ int_or_none,
+ merge_dicts,
+ mimetype2ext,
+ url_or_none,
+)
+
+
+class AparatIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.aparat.com/v/wP8On',
+ 'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
+ 'info_dict': {
+ 'id': 'wP8On',
+ 'ext': 'mp4',
+ 'title': 'تیم گلکسی 11 - زومیت',
+ 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
+ 'duration': 231,
+ 'timestamp': 1387394859,
+ 'upload_date': '20131218',
+ 'view_count': int,
+ },
+ }, {
+ # multiple formats
+ 'url': 'https://www.aparat.com/v/8dflw/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Provides more metadata
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ if not webpage:
+ webpage = self._download_webpage(
+ 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+ video_id)
+
+ options = self._parse_json(self._search_regex(
+ r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
+
+ formats = []
+ for sources in (options.get('multiSRC') or []):
+ for item in sources:
+ if not isinstance(item, dict):
+ continue
+ file_url = url_or_none(item.get('src'))
+ if not file_url:
+ continue
+ item_type = item.get('type')
+ if item_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ file_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ ext = mimetype2ext(item.get('type'))
+ label = item.get('label')
+ formats.append({
+ 'url': file_url,
+ 'ext': ext,
+ 'format_id': 'http-%s' % (label or ext),
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', label or '', 'height',
+ default=None)),
+ })
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ if not info.get('title'):
+ info['title'] = get_element_by_id('videoTitle', webpage) or \
+ self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'thumbnail': url_or_none(options.get('poster')),
+ 'duration': int_or_none(options.get('duration')),
+ 'formats': formats,
+ })
diff --git a/hypervideo_dl/extractor/appleconnect.py b/hypervideo_dl/extractor/appleconnect.py
new file mode 100644
index 0000000..a84b8b1
--- /dev/null
+++ b/hypervideo_dl/extractor/appleconnect.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+ ExtractorError
+)
+
+
+class AppleConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
+ _TEST = {
+ 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'md5': 'e7c38568a01ea45402570e6029206723',
+ 'info_dict': {
+ 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'ext': 'm4v',
+ 'title': 'Energy',
+ 'uploader': 'Drake',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150710',
+ 'timestamp': 1436545535,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ try:
+ video_json = self._html_search_regex(
+ r'class="auc-video-data">(\{.*?\})', webpage, 'json')
+ except ExtractorError:
+ raise ExtractorError('This post doesn\'t contain a video', expected=True)
+
+ video_data = self._parse_json(video_json, video_id)
+ timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+
+ return {
+ 'id': video_id,
+ 'url': video_data['sslSrc'],
+ 'title': video_data['title'],
+ 'description': video_data['description'],
+ 'uploader': video_data['artistName'],
+ 'thumbnail': video_data['artworkUrl'],
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ }
diff --git a/hypervideo_dl/extractor/applepodcasts.py b/hypervideo_dl/extractor/applepodcasts.py
new file mode 100644
index 0000000..6a74de7
--- /dev/null
+++ b/hypervideo_dl/extractor/applepodcasts.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class ApplePodcastsIE(InfoExtractor):
+ _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
+ 'md5': 'df02e6acb11c10e844946a39e7222b08',
+ 'info_dict': {
+ 'id': '1000482637777',
+ 'ext': 'mp3',
+ 'title': '207 - Whitney Webb Returns',
+ 'description': 'md5:13a73bade02d2e43737751e3987e1399',
+ 'upload_date': '20200705',
+ 'timestamp': 1593921600,
+ 'duration': 6425,
+ 'series': 'The Tim Dillon Show',
+ }
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ webpage = self._download_webpage(url, episode_id)
+ ember_data = self._parse_json(self._search_regex(
+ r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
+ webpage, 'ember data'), episode_id)
+ ember_data = ember_data.get(episode_id) or ember_data
+ episode = ember_data['data']['attributes']
+ description = episode.get('description') or {}
+
+ series = None
+ for inc in (ember_data.get('included') or []):
+ if inc.get('type') == 'media/podcast':
+ series = try_get(inc, lambda x: x['attributes']['name'])
+
+ return {
+ 'id': episode_id,
+ 'title': episode['name'],
+ 'url': clean_podcast_url(episode['assetUrl']),
+ 'description': description.get('standard') or description.get('short'),
+ 'timestamp': parse_iso8601(episode.get('releaseDateTime')),
+ 'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
+ 'series': series,
+ }
diff --git a/hypervideo_dl/extractor/appletrailers.py b/hypervideo_dl/extractor/appletrailers.py
new file mode 100644
index 0000000..10442a5
--- /dev/null
+++ b/hypervideo_dl/extractor/appletrailers.py
@@ -0,0 +1,283 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+ IE_NAME = 'appletrailers'
+ _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
+ 'info_dict': {
+ 'id': '5111',
+ 'title': 'Man of Steel',
+ },
+ 'playlist': [
+ {
+ 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer4',
+ 'ext': 'mov',
+ 'duration': 111,
+ 'title': 'Trailer 4',
+ 'upload_date': '20130523',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer3',
+ 'ext': 'mov',
+ 'duration': 182,
+ 'title': 'Trailer 3',
+ 'upload_date': '20130417',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': 'd0f1e1150989b9924679b441f3404d48',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer',
+ 'ext': 'mov',
+ 'duration': 148,
+ 'title': 'Trailer',
+ 'upload_date': '20121212',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': '5fe08795b943eb2e757fa95cb6def1cb',
+ 'info_dict': {
+ 'id': 'manofsteel-teaser',
+ 'ext': 'mov',
+ 'duration': 93,
+ 'title': 'Teaser',
+ 'upload_date': '20120721',
+ 'uploader_id': 'wb',
+ },
+ },
+ ]
+ }, {
+ 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
+ 'info_dict': {
+ 'id': '4489',
+ 'title': 'Blackthorn',
+ },
+ 'playlist_mincount': 2,
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
+ 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
+ 'info_dict': {
+ 'id': '15881',
+ 'title': 'Kung Fu Panda 3',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
+ 'only_matching': True,
+ }]
+
+ _JSON_RE = r'iTunes.playURL\((.*?)\);'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ movie = mobj.group('movie')
+ uploader_id = mobj.group('company')
+
+ webpage = self._download_webpage(url, movie)
+ film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
+ film_data = self._download_json(
+ 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
+ film_id, fatal=False)
+
+ if film_data:
+ entries = []
+ for clip in film_data.get('clips', []):
+ clip_title = clip['title']
+
+ formats = []
+ for version, version_data in clip.get('versions', {}).items():
+ for size, size_data in version_data.get('sizes', {}).items():
+ src = size_data.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': '%s-%s' % (version, size),
+ 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
+ 'width': int_or_none(size_data.get('width')),
+ 'height': int_or_none(size_data.get('height')),
+ 'language': version[:2],
+ })
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
+ 'formats': formats,
+ 'title': clip_title,
+ 'thumbnail': clip.get('screen') or clip.get('thumb'),
+ 'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
+ 'upload_date': unified_strdate(clip.get('posted')),
+ 'uploader_id': uploader_id,
+ })
+
+ page_data = film_data.get('page', {})
+ return self.playlist_result(entries, film_id, page_data.get('movie_title'))
+
+ playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
+ def fix_html(s):
+ s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
+ s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+
+ def _clean_json(m):
+ return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+ s = re.sub(self._JSON_RE, _clean_json, s)
+ s = '<html>%s</html>' % s
+ return s
+ doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
+
+ playlist = []
+ for li in doc.findall('./div/ul/li'):
+ on_click = li.find('.//a').attrib['onClick']
+ trailer_info_json = self._search_regex(self._JSON_RE,
+ on_click, 'trailer info')
+ trailer_info = json.loads(trailer_info_json)
+ first_url = trailer_info.get('url')
+ if not first_url:
+ continue
+ title = trailer_info['title']
+ video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+ thumbnail = li.find('.//img').attrib['src']
+ upload_date = trailer_info['posted'].replace('-', '')
+
+ runtime = trailer_info['runtime']
+ m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
+ duration = None
+ if m:
+ duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+ trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
+ settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
+ settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
+
+ formats = []
+ for format in settings['metadata']['sizes']:
+ # The src is a file pointing to the real video file
+ format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
+ formats.append({
+ 'url': format_url,
+ 'format': format['type'],
+ 'width': int_or_none(format['width']),
+ 'height': int_or_none(format['height']),
+ })
+
+ self._sort_formats(formats)
+
+ playlist.append({
+ '_type': 'video',
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'http_headers': {
+ 'User-Agent': 'QuickTime compatible (hypervideo)',
+ },
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': movie,
+ 'entries': playlist,
+ }
+
+
+class AppleTrailersSectionIE(InfoExtractor):
+ IE_NAME = 'appletrailers:section'
+ _SECTIONS = {
+ 'justadded': {
+ 'feed_path': 'just_added',
+ 'title': 'Just Added',
+ },
+ 'exclusive': {
+ 'feed_path': 'exclusive',
+ 'title': 'Exclusive',
+ },
+ 'justhd': {
+ 'feed_path': 'just_hd',
+ 'title': 'Just HD',
+ },
+ 'mostpopular': {
+ 'feed_path': 'most_pop',
+ 'title': 'Most Popular',
+ },
+ 'moviestudios': {
+ 'feed_path': 'studios',
+ 'title': 'Movie Studios',
+ },
+ }
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/#section=justadded',
+ 'info_dict': {
+ 'title': 'Just Added',
+ 'id': 'justadded',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=exclusive',
+ 'info_dict': {
+ 'title': 'Exclusive',
+ 'id': 'exclusive',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=justhd',
+ 'info_dict': {
+ 'title': 'Just HD',
+ 'id': 'justhd',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=mostpopular',
+ 'info_dict': {
+ 'title': 'Most Popular',
+ 'id': 'mostpopular',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=moviestudios',
+ 'info_dict': {
+ 'title': 'Movie Studios',
+ 'id': 'moviestudios',
+ },
+ 'playlist_mincount': 80,
+ }]
+
+ def _real_extract(self, url):
+ section = self._match_id(url)
+ section_data = self._download_json(
+ 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
+ section)
+ entries = [
+ self.url_result('http://trailers.apple.com' + e['location'])
+ for e in section_data]
+ return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
diff --git a/hypervideo_dl/extractor/archiveorg.py b/hypervideo_dl/extractor/archiveorg.py
new file mode 100644
index 0000000..e42ed5e
--- /dev/null
+++ b/hypervideo_dl/extractor/archiveorg.py
@@ -0,0 +1,95 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ unified_strdate,
+ unified_timestamp,
+)
+
+
+class ArchiveOrgIE(InfoExtractor):
+ IE_NAME = 'archive.org'
+ IE_DESC = 'archive.org videos'
+ _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'md5': '8af1d4cf447933ed3c7f4871162602db',
+ 'info_dict': {
+ 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'ext': 'ogg',
+ 'title': '1968 Demo - FJCC Conference Presentation Reel #1',
+ 'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
+ 'creator': 'SRI International',
+ 'release_date': '19681210',
+ 'uploader': 'SRI International',
+ 'timestamp': 1268695290,
+ 'upload_date': '20100315',
+ }
+ }, {
+ 'url': 'https://archive.org/details/Cops1922',
+ 'md5': '0869000b4ce265e8ca62738b336b268a',
+ 'info_dict': {
+ 'id': 'Cops1922',
+ 'ext': 'mp4',
+ 'title': 'Buster Keaton\'s "Cops" (1922)',
+ 'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
+ 'timestamp': 1387699629,
+ 'upload_date': '20131222',
+ }
+ }, {
+ 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://archive.org/embed/' + video_id, video_id)
+
+ playlist = None
+ play8 = self._search_regex(
+ r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
+ 'playlist', default=None)
+ if play8:
+ attrs = extract_attributes(play8)
+ playlist = attrs.get('value')
+ if not playlist:
+ # Old jwplayer fallback
+ playlist = self._search_regex(
+ r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
+ webpage, 'jwplayer playlist', default='[]')
+ jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
+ if jwplayer_playlist:
+ info = self._parse_jwplayer_data(
+ {'playlist': jwplayer_playlist}, video_id, base_url=url)
+ else:
+ # HTML5 media fallback
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info['id'] = video_id
+
+ def get_optional(metadata, field):
+ return metadata.get(field, [None])[0]
+
+ metadata = self._download_json(
+ 'http://archive.org/details/' + video_id, video_id, query={
+ 'output': 'json',
+ })['metadata']
+ info.update({
+ 'title': get_optional(metadata, 'title') or info.get('title'),
+ 'description': clean_html(get_optional(metadata, 'description')),
+ })
+ if info.get('_type') != 'playlist':
+ creator = get_optional(metadata, 'creator')
+ info.update({
+ 'creator': creator,
+ 'release_date': unified_strdate(get_optional(metadata, 'date')),
+ 'uploader': get_optional(metadata, 'publisher') or creator,
+ 'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
+ 'language': get_optional(metadata, 'language'),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/arcpublishing.py b/hypervideo_dl/extractor/arcpublishing.py
new file mode 100644
index 0000000..ca6a6c4
--- /dev/null
+++ b/hypervideo_dl/extractor/arcpublishing.py
@@ -0,0 +1,174 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class ArcPublishingIE(InfoExtractor):
+ _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+ _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
+ _TESTS = [{
+ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
+ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
+ 'only_matching': True,
+ }, {
+ # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
+ 'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
+ 'only_matching': True,
+ }, {
+ # https://www.actionnewsjax.com/video/live-stream/
+ 'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
+ 'only_matching': True,
+ }, {
+ # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
+ 'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
+ 'only_matching': True,
+ }, {
+ # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
+ 'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
+ 'only_matching': True,
+ }, {
+ # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
+ 'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
+ 'only_matching': True,
+ }, {
+ # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
+ 'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
+ 'only_matching': True,
+ }, {
+ # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
+ 'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
+ 'only_matching': True,
+ }, {
+ # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
+ 'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
+ 'only_matching': True,
+ }, {
+ # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
+ 'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
+ 'only_matching': True,
+ }, {
+ # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
+ 'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
+ 'only_matching': True,
+ }, {
+ # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
+ 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
+ 'only_matching': True,
+ }]
+ _POWA_DEFAULTS = [
+ (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
+ ([
+ 'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
+ 'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
+ 'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
+ ], 'video-api-cdn.%s.arcpublishing.com/api'),
+ ]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ entries = []
+ # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
+ for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
+ powa = extract_attributes(powa_el) or {}
+ org = powa.get('data-org')
+ uuid = powa.get('data-uuid')
+ if org and uuid:
+ entries.append('arcpublishing:%s:%s' % (org, uuid))
+ return entries
+
+ def _real_extract(self, url):
+ org, uuid = re.match(self._VALID_URL, url).groups()
+ for orgs, tmpl in self._POWA_DEFAULTS:
+ if org in orgs:
+ base_api_tmpl = tmpl
+ break
+ else:
+ base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
+ if org == 'wapo':
+ org = 'washpost'
+ video = self._download_json(
+ 'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
+ uuid, query={'uuid': uuid})[0]
+ title = video['headlines']['basic']
+ is_live = video.get('status') == 'live'
+
+ urls = []
+ formats = []
+ for s in video.get('streams', []):
+ s_url = s.get('url')
+ if not s_url or s_url in urls:
+ continue
+ urls.append(s_url)
+ stream_type = s.get('stream_type')
+ if stream_type == 'smil':
+ smil_formats = self._extract_smil_formats(
+ s_url, uuid, fatal=False)
+ for f in smil_formats:
+ if f['url'].endswith('/cfx/st'):
+ f['app'] = 'cfx/st'
+ if not f['play_path'].startswith('mp4:'):
+ f['play_path'] = 'mp4:' + f['play_path']
+ if isinstance(f['tbr'], float):
+ f['vbr'] = f['tbr'] * 1000
+ del f['tbr']
+ f['format_id'] = 'rtmp-%d' % f['vbr']
+ formats.extend(smil_formats)
+ elif stream_type in ('ts', 'hls'):
+ m3u8_formats = self._extract_m3u8_formats(
+ s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ if all([f.get('acodec') == 'none' for f in m3u8_formats]):
+ continue
+ for f in m3u8_formats:
+ if f.get('acodec') == 'none':
+ f['preference'] = -40
+ elif f.get('vcodec') == 'none':
+ f['preference'] = -50
+ height = f.get('height')
+ if not height:
+ continue
+ vbr = self._search_regex(
+ r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
+ if vbr:
+ f['vbr'] = int(vbr)
+ formats.extend(m3u8_formats)
+ else:
+ vbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
+ 'vbr': vbr,
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'filesize': int_or_none(s.get('filesize')),
+ 'url': s_url,
+ 'preference': -1,
+ })
+ self._sort_formats(
+ formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
+
+ subtitles = {}
+ for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
+ subtitle_url = subtitle.get('url')
+ if subtitle_url:
+ subtitles.setdefault('en', []).append({'url': subtitle_url})
+
+ return {
+ 'id': uuid,
+ 'title': self._live_title(title) if is_live else title,
+ 'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
+ 'description': try_get(video, lambda x: x['subheadlines']['basic']),
+ 'formats': formats,
+ 'duration': int_or_none(video.get('duration'), 100),
+ 'timestamp': parse_iso8601(video.get('created_date')),
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/ard.py b/hypervideo_dl/extractor/ard.py
new file mode 100644
index 0000000..d45a9fe
--- /dev/null
+++ b/hypervideo_dl/extractor/ard.py
@@ -0,0 +1,452 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .generic import GenericIE
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ qualities,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ xpath_text,
+)
+from ..compat import compat_etree_fromstring
+
+
+class ARDMediathekBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+
+ def _extract_media_info(self, media_info_url, webpage, video_id):
+ media_info = self._download_json(
+ media_info_url, video_id, 'Downloading media JSON')
+ return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
+
+ def _parse_media_info(self, media_info, video_id, fsk):
+ formats = self._extract_formats(media_info, video_id)
+
+ if not formats:
+ if fsk:
+ raise ExtractorError(
+ 'This video is only available after 20:00', expected=True)
+ elif media_info.get('_geoblocked'):
+ self.raise_geo_restricted(
+ 'This video is not available due to geoblocking',
+ countries=self._GEO_COUNTRIES)
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_url = media_info.get('_subtitleUrl')
+ if subtitle_url:
+ subtitles['de'] = [{
+ 'ext': 'ttml',
+ 'url': subtitle_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'duration': int_or_none(media_info.get('_duration')),
+ 'thumbnail': media_info.get('_previewImage'),
+ 'is_live': media_info.get('_isLive') is True,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_formats(self, media_info, video_id):
+ type_ = media_info.get('_type')
+ media_array = media_info.get('_mediaArray', [])
+ formats = []
+ for num, media in enumerate(media_array):
+ for stream in media.get('_mediaStreamArray', []):
+ stream_urls = stream.get('_stream')
+ if not stream_urls:
+ continue
+ if not isinstance(stream_urls, list):
+ stream_urls = [stream_urls]
+ quality = stream.get('_quality')
+ server = stream.get('_server')
+ for stream_url in stream_urls:
+ if not url_or_none(stream_url):
+ continue
+ ext = determine_ext(stream_url)
+ if quality != 'auto' and ext in ('f4m', 'm3u8'):
+ continue
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(stream_url, {
+ 'hdcore': '3.1.1',
+ 'plugin': 'aasp-3.1.1.69.124'
+ }), video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ if server and server.startswith('rtmp'):
+ f = {
+ 'url': server,
+ 'play_path': stream_url,
+ 'format_id': 'a%s-rtmp-%s' % (num, quality),
+ }
+ else:
+ f = {
+ 'url': stream_url,
+ 'format_id': 'a%s-%s-%s' % (num, ext, quality)
+ }
+ m = re.search(
+ r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
+ stream_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+ formats.append(f)
+ return formats
+
+
+class ARDMediathekIE(ARDMediathekBaseIE):
+ IE_NAME = 'ARD:mediathek'
+ _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
+
+ _TESTS = [{
+ # available till 26.07.2022
+ 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
+ 'info_dict': {
+ 'id': '44726822',
+ 'ext': 'mp4',
+ 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
+ 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
+ 'duration': 1740,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ # determine video id from url
+ m = re.match(self._VALID_URL, url)
+
+ document_id = None
+
+ numid = re.search(r'documentId=([0-9]+)', url)
+ if numid:
+ document_id = video_id = numid.group(1)
+ else:
+ video_id = m.group('video_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ ERRORS = (
+ ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
+ ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
+ 'Video %s is no longer available'),
+ )
+
+ for pattern, message in ERRORS:
+ if pattern in webpage:
+ raise ExtractorError(message % video_id, expected=True)
+
+ if re.search(r'[\?&]rss($|[=&])', url):
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ if doc.tag == 'rss':
+ return GenericIE()._extract_rss(url, video_id, doc)
+
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+ r'<meta name="dcterms\.title" content="(.*?)"/>',
+ r'<h4 class="headline">(.*?)</h4>',
+ r'<title[^>]*>(.*?)</title>'],
+ webpage, 'title')
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'dcterms.abstract', webpage, 'description', default=None)
+ if description is None:
+ description = self._html_search_meta(
+ 'description', webpage, 'meta description', default=None)
+ if description is None:
+ description = self._html_search_regex(
+ r'<p\s+class="teasertext">(.+?)</p>',
+ webpage, 'teaser text', default=None)
+
+ # Thumbnail is sometimes not present.
+ # It is in the mobile version, but that seems to use a different URL
+ # structure altogether.
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ media_streams = re.findall(r'''(?x)
+ mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
+ "([^"]+)"''', webpage)
+
+ if media_streams:
+ QUALITIES = qualities(['lo', 'hi', 'hq'])
+ formats = []
+ for furl in set(media_streams):
+ if furl.endswith('.f4m'):
+ fid = 'f4m'
+ else:
+ fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
+ fid = fid_m.group(1) if fid_m else None
+ formats.append({
+ 'quality': QUALITIES(fid),
+ 'format_id': fid,
+ 'url': furl,
+ })
+ self._sort_formats(formats)
+ info = {
+ 'formats': formats,
+ }
+ else: # request JSON file
+ if not document_id:
+ video_id = self._search_regex(
+ r'/play/(?:config|media)/(\d+)', webpage, 'media id')
+ info = self._extract_media_info(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id,
+ webpage, video_id)
+
+ info.update({
+ 'id': video_id,
+ 'title': self._live_title(title) if info.get('is_live') else title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ })
+
+ return info
+
+
+class ARDIE(InfoExtractor):
+ _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
+ _TESTS = [{
+ # available till 7.01.2022
+ 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
+ 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
+ 'info_dict': {
+ 'id': 'maischberger-die-woche-video100',
+ 'display_id': 'maischberger-die-woche-video100',
+ 'ext': 'mp4',
+ 'duration': 3687.0,
+ 'title': 'maischberger. die woche vom 7. Januar 2021',
+ 'upload_date': '20210107',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+
+ player_url = mobj.group('mainurl') + '~playerXml.xml'
+ doc = self._download_xml(player_url, display_id)
+ video_node = doc.find('./video')
+ upload_date = unified_strdate(xpath_text(
+ video_node, './broadcastDate'))
+ thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
+
+ formats = []
+ for a in video_node.findall('.//asset'):
+ file_name = xpath_text(a, './fileName', default=None)
+ if not file_name:
+ continue
+ format_type = a.attrib.get('type')
+ format_url = url_or_none(file_name)
+ if format_url:
+ ext = determine_ext(file_name)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_type or 'hls', fatal=False))
+ continue
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}),
+ display_id, f4m_id=format_type or 'hds', fatal=False))
+ continue
+ f = {
+ 'format_id': format_type,
+ 'width': int_or_none(xpath_text(a, './frameWidth')),
+ 'height': int_or_none(xpath_text(a, './frameHeight')),
+ 'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
+ 'abr': int_or_none(xpath_text(a, './bitrateAudio')),
+ 'vcodec': xpath_text(a, './codecVideo'),
+ 'tbr': int_or_none(xpath_text(a, './totalBitrate')),
+ }
+ server_prefix = xpath_text(a, './serverPrefix', default=None)
+ if server_prefix:
+ f.update({
+ 'url': server_prefix,
+ 'playpath': file_name,
+ })
+ else:
+ if not format_url:
+ continue
+ f['url'] = format_url
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': xpath_text(video_node, './videoId', default=display_id),
+ 'formats': formats,
+ 'display_id': display_id,
+ 'title': video_node.find('./title').text,
+ 'duration': parse_duration(video_node.find('./duration').text),
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ }
+
+
+class ARDBetaMediathekIE(ARDMediathekBaseIE):
+ _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
+ 'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
+ 'info_dict': {
+ 'display_id': 'die-robuste-roswita',
+ 'id': '78566716',
+ 'title': 'Die robuste Roswita',
+ 'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
+ 'duration': 5316,
+ 'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
+ 'timestamp': 1596658200,
+ 'upload_date': '20200805',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_page = self._download_json(
+ 'https://api.ardmediathek.de/public-gateway',
+ video_id, data=json.dumps({
+ 'query': '''{
+ playerPage(client: "ard", clipId: "%s") {
+ blockedByFsk
+ broadcastedOn
+ maturityContentRating
+ mediaCollection {
+ _duration
+ _geoblocked
+ _isLive
+ _mediaArray {
+ _mediaStreamArray {
+ _quality
+ _server
+ _stream
+ }
+ }
+ _previewImage
+ _subtitleUrl
+ _type
+ }
+ show {
+ title
+ }
+ synopsis
+ title
+ tracking {
+ atiCustomVars {
+ contentId
+ }
+ }
+ }
+}''' % video_id,
+ }).encode(), headers={
+ 'Content-Type': 'application/json'
+ })['data']['playerPage']
+ title = player_page['title']
+ content_id = str_or_none(try_get(
+ player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
+ media_collection = player_page.get('mediaCollection') or {}
+ if not media_collection and content_id:
+ media_collection = self._download_json(
+ 'https://www.ardmediathek.de/play/media/' + content_id,
+ content_id, fatal=False) or {}
+ info = self._parse_media_info(
+ media_collection, content_id or video_id,
+ player_page.get('blockedByFsk'))
+ age_limit = None
+ description = player_page.get('synopsis')
+ maturity_content_rating = player_page.get('maturityContentRating')
+ if maturity_content_rating:
+ age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
+ if not age_limit and description:
+ age_limit = int_or_none(self._search_regex(
+ r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
+ info.update({
+ 'age_limit': age_limit,
+ 'title': title,
+ 'description': description,
+ 'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
+ 'series': try_get(player_page, lambda x: x['show']['title']),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/arkena.py b/hypervideo_dl/extractor/arkena.py
new file mode 100644
index 0000000..fd46b1c
--- /dev/null
+++ b/hypervideo_dl/extractor/arkena.py
@@ -0,0 +1,163 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class ArkenaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ video\.(?:arkena|qbrick)\.com/play2/embed/player\?|
+ play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
+ 'md5': '97f117754e5f3c020f5f26da4a44ebaf',
+ 'info_dict': {
+ 'id': 'd8ab4607-00090107-aab86310',
+ 'ext': 'mp4',
+ 'title': 'EM_HT20_117_roslund_v2.mp4',
+ 'timestamp': 1608285912,
+ 'upload_date': '20201218',
+ 'duration': 1429.162667,
+ 'subtitles': {
+ 'sv': 'count:3',
+ },
+ },
+ }, {
+ 'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ account_id = mobj.group('account_id')
+
+ # Handle http://video.arkena.com/play2/embed/player URL
+ if not video_id:
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('mediaId', [None])[0]
+ account_id = qs.get('accountId', [None])[0]
+ if not video_id or not account_id:
+ raise ExtractorError('Invalid URL', expected=True)
+
+ media = self._download_json(
+ 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
+ video_id, query={
+ # https://video.qbrick.com/docs/api/examples/library-api.html
+ 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
+ })
+ metadata = media.get('metadata') or {}
+ title = metadata['title']
+
+ duration = None
+ formats = []
+ thumbnails = []
+ subtitles = {}
+ for resource in media['asset']['resources']:
+ for rendition in (resource.get('renditions') or []):
+ rendition_type = rendition.get('type')
+ for i, link in enumerate(rendition.get('links') or []):
+ href = link.get('href')
+ if not href:
+ continue
+ if rendition_type == 'image':
+ thumbnails.append({
+ 'filesize': int_or_none(rendition.get('size')),
+ 'height': int_or_none(rendition.get('height')),
+ 'id': rendition.get('id'),
+ 'url': href,
+ 'width': int_or_none(rendition.get('width')),
+ })
+ elif rendition_type == 'subtitle':
+ subtitles.setdefault(rendition.get('language') or 'en', []).append({
+ 'url': href,
+ })
+ elif rendition_type == 'video':
+ f = {
+ 'filesize': int_or_none(rendition.get('size')),
+ 'format_id': rendition.get('id'),
+ 'url': href,
+ }
+ video = try_get(rendition, lambda x: x['videos'][i], dict)
+ if video:
+ if not duration:
+ duration = float_or_none(video.get('duration'))
+ f.update({
+ 'height': int_or_none(video.get('height')),
+ 'tbr': int_or_none(video.get('bitrate'), 1000),
+ 'vcodec': video.get('codec'),
+ 'width': int_or_none(video.get('width')),
+ })
+ audio = try_get(video, lambda x: x['audios'][0], dict)
+ if audio:
+ f.update({
+ 'acodec': audio.get('codec'),
+ 'asr': int_or_none(audio.get('sampleRate')),
+ })
+ formats.append(f)
+ elif rendition_type == 'index':
+ mime_type = link.get('mimeType')
+ if mime_type == 'application/smil+xml':
+ formats.extend(self._extract_smil_formats(
+ href, video_id, fatal=False))
+ elif mime_type == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ href, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mime_type == 'application/hds+xml':
+ formats.extend(self._extract_f4m_formats(
+ href, video_id, f4m_id='hds', fatal=False))
+ elif mime_type == 'application/dash+xml':
+ formats.extend(self._extract_f4m_formats(
+ href, video_id, f4m_id='hds', fatal=False))
+ elif mime_type == 'application/vnd.ms-sstr+xml':
+ formats.extend(self._extract_ism_formats(
+ href, video_id, ism_id='mss', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': metadata.get('description'),
+ 'timestamp': parse_iso8601(media.get('created')),
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'duration': duration,
+ 'tags': media.get('tags'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/arnes.py b/hypervideo_dl/extractor/arnes.py
new file mode 100644
index 0000000..c0032fc
--- /dev/null
+++ b/hypervideo_dl/extractor/arnes.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+ IE_NAME = 'video.arnes.si'
+ IE_DESC = 'Arnes Video'
+ _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
+ _TESTS = [{
+ 'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+ 'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+ 'info_dict': {
+ 'id': 'a1qrWTOQfVoU',
+ 'ext': 'mp4',
+ 'title': 'Linearna neodvisnost, definicija',
+ 'description': 'Linearna neodvisnost, definicija',
+ 'license': 'PRIVATE',
+ 'creator': 'Polona Oblak',
+ 'timestamp': 1585063725,
+ 'upload_date': '20200324',
+ 'channel': 'Polona Oblak',
+ 'channel_id': 'q6pc04hw24cj',
+ 'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+ 'duration': 596.75,
+ 'view_count': int,
+ 'tags': ['linearna_algebra'],
+ 'start_time': 10,
+ }
+ }, {
+ 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+ 'only_matching': True,
+ }]
+ _BASE_URL = 'https://video.arnes.si'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+ title = video['title']
+
+ formats = []
+ for media in (video.get('media') or []):
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ formats.append({
+ 'url': self._BASE_URL + media_url,
+ 'format_id': remove_start(media.get('format'), 'FORMAT_'),
+ 'format_note': media.get('formatTranslation'),
+ 'width': int_or_none(media.get('width')),
+ 'height': int_or_none(media.get('height')),
+ })
+ self._sort_formats(formats)
+
+ channel = video.get('channel') or {}
+ channel_id = channel.get('url')
+ thumbnail = video.get('thumbnailUrl')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': self._BASE_URL + thumbnail,
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'creator': video.get('author'),
+ 'timestamp': parse_iso8601(video.get('creationTime')),
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'view_count': int_or_none(video.get('views')),
+ 'tags': video.get('hashtags'),
+ 'start_time': int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+ }
diff --git a/hypervideo_dl/extractor/arte.py b/hypervideo_dl/extractor/arte.py
new file mode 100644
index 0000000..03abdbf
--- /dev/null
+++ b/hypervideo_dl/extractor/arte.py
@@ -0,0 +1,254 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ try_get,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class ArteTVBaseIE(InfoExtractor):
+ _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
+ _API_BASE = 'https://api.arte.tv/api/player/v1'
+
+
+class ArteTVIE(ArteTVBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
+ api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
+ )
+ /(?P<id>\d{6}-\d{3}-[AF])
+ ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
+ 'info_dict': {
+ 'id': '088501-000-A',
+ 'ext': 'mp4',
+ 'title': 'Mexico: Stealing Petrol to Survive',
+ 'upload_date': '20190628',
+ },
+ }, {
+ 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ lang = mobj.group('lang') or mobj.group('lang_2')
+
+ info = self._download_json(
+ '%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
+ player_info = info['videoJsonPlayer']
+
+ vsr = try_get(player_info, lambda x: x['VSR'], dict)
+ if not vsr:
+ error = None
+ if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
+ error = try_get(
+ player_info, lambda x: x['custom_msg']['msg'], compat_str)
+ if not error:
+ error = 'Video %s is not available' % player_info.get('VID') or video_id
+ raise ExtractorError(error, expected=True)
+
+ upload_date_str = player_info.get('shootingDate')
+ if not upload_date_str:
+ upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
+
+ title = (player_info.get('VTI') or player_info['VID']).strip()
+ subtitle = player_info.get('VSU', '').strip()
+ if subtitle:
+ title += ' - %s' % subtitle
+
+ qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
+
+ LANGS = {
+ 'fr': 'F',
+ 'de': 'A',
+ 'en': 'E[ANG]',
+ 'es': 'E[ESP]',
+ 'it': 'E[ITA]',
+ 'pl': 'E[POL]',
+ }
+
+ langcode = LANGS.get(lang, lang)
+
+ formats = []
+ for format_id, format_dict in vsr.items():
+ f = dict(format_dict)
+ format_url = url_or_none(f.get('url'))
+ streamer = f.get('streamer')
+ if not format_url and not streamer:
+ continue
+ versionCode = f.get('versionCode')
+ l = re.escape(langcode)
+
+ # Language preference from most to least priority
+ # Reference: section 6.8 of
+ # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
+ PREFERENCES = (
+ # original version in requested language, without subtitles
+ r'VO{0}$'.format(l),
+ # original version in requested language, with partial subtitles in requested language
+ r'VO{0}-ST{0}$'.format(l),
+ # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+ r'VO{0}-STM{0}$'.format(l),
+ # non-original (dubbed) version in requested language, without subtitles
+ r'V{0}$'.format(l),
+ # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
+ r'V{0}-ST{0}$'.format(l),
+ # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+ r'V{0}-STM{0}$'.format(l),
+ # original version in requested language, with partial subtitles in different language
+ r'VO{0}-ST(?!{0}).+?$'.format(l),
+ # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
+ r'VO{0}-STM(?!{0}).+?$'.format(l),
+ # original version in different language, with partial subtitles in requested language
+ r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
+ # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
+ r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
+ # original version in different language, without subtitles
+ r'VO(?:(?!{0}))?$'.format(l),
+ # original version in different language, with partial subtitles in different language
+ r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
+ # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
+ r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
+ )
+
+ for pref, p in enumerate(PREFERENCES):
+ if re.match(p, versionCode):
+ lang_pref = len(PREFERENCES) - pref
+ break
+ else:
+ lang_pref = -1
+
+ media_type = f.get('mediaType')
+ if media_type == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False)
+ for m3u8_format in m3u8_formats:
+ m3u8_format['language_preference'] = lang_pref
+ formats.extend(m3u8_formats)
+ continue
+
+ format = {
+ 'format_id': format_id,
+ 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
+ 'language_preference': lang_pref,
+ 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ 'quality': qfunc(f.get('quality')),
+ }
+
+ if media_type == 'rtmp':
+ format['url'] = f['streamer']
+ format['play_path'] = 'mp4:' + f['url']
+ format['ext'] = 'flv'
+ else:
+ format['url'] = f['url']
+
+ formats.append(format)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': player_info.get('VID') or video_id,
+ 'title': title,
+ 'description': player_info.get('VDE'),
+ 'upload_date': unified_strdate(upload_date_str),
+ 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
+ 'formats': formats,
+ }
+
+
+class ArteTVEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
+ 'info_dict': {
+ 'id': '100605-013-A',
+ 'ext': 'mp4',
+ 'title': 'United we Stream November Lockdown Edition #13',
+ 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
+ 'upload_date': '20201116',
+ },
+ }, {
+ 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ json_url = qs['json_url'][0]
+ video_id = ArteTVIE._match_id(json_url)
+ return self.url_result(
+ json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
+
+
+class ArteTVPlaylistIE(ArteTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
+ 'info_dict': {
+ 'id': 'RC-016954',
+ 'title': 'Earn a Living',
+ 'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lang, playlist_id = re.match(self._VALID_URL, url).groups()
+ collection = self._download_json(
+ '%s/collectionData/%s/%s?source=videos'
+ % (self._API_BASE, lang, playlist_id), playlist_id)
+ entries = []
+ for video in collection['videos']:
+ if not isinstance(video, dict):
+ continue
+ video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
+ if not video_url:
+ continue
+ video_id = video.get('programId')
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'id': video_id,
+ 'title': video.get('title'),
+ 'alt_title': video.get('subtitle'),
+ 'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
+ 'duration': int_or_none(video.get('durationSeconds')),
+ 'view_count': int_or_none(video.get('views')),
+ 'ie_key': ArteTVIE.ie_key(),
+ })
+ title = collection.get('title')
+ description = collection.get('shortDescription') or collection.get('teaserText')
+ return self.playlist_result(entries, playlist_id, title, description)
diff --git a/hypervideo_dl/extractor/asiancrush.py b/hypervideo_dl/extractor/asiancrush.py
new file mode 100644
index 0000000..66ce7c6
--- /dev/null
+++ b/hypervideo_dl/extractor/asiancrush.py
@@ -0,0 +1,200 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ OnDemandPagedList,
+ parse_age_limit,
+ strip_or_none,
+ try_get,
+)
+
+
+class AsianCrushBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
+ _KALTURA_KEYS = [
+ 'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
+ 'widescreen_thumbnail_url', 'screencap_widescreen',
+ ]
+ _API_SUFFIX = {'retrocrush.tv': '-ott'}
+
+ def _call_api(self, host, endpoint, video_id, query, resource):
+ return self._download_json(
+ 'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
+ 'Downloading %s JSON metadata' % resource, query=query,
+ headers=self.geo_verification_headers())['objects']
+
+ def _download_object_data(self, host, object_id, resource):
+ return self._call_api(
+ host, 'search', object_id, {'id': object_id}, resource)[0]
+
+ def _get_object_description(self, obj):
+ return strip_or_none(obj.get('long_description') or obj.get('short_description'))
+
+ def _parse_video_data(self, video):
+ title = video['name']
+
+ entry_id, partner_id = [None] * 2
+ for k in self._KALTURA_KEYS:
+ k_url = video.get(k)
+ if k_url:
+ mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
+ if mobj:
+ partner_id, entry_id = mobj.groups()
+ break
+
+ meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
+ categories = list(filter(None, [c.get('name') for c in meta_categories]))
+
+ show_info = video.get('show_info') or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+ 'ie_key': KalturaIE.ie_key(),
+ 'id': entry_id,
+ 'title': title,
+ 'description': self._get_object_description(video),
+ 'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
+ 'categories': categories,
+ 'series': show_info.get('show_name'),
+ 'season_number': int_or_none(show_info.get('season_num')),
+ 'season_id': show_info.get('season_id'),
+ 'episode_number': int_or_none(show_info.get('episode_num')),
+ }
+
+
+class AsianCrushIE(AsianCrushBaseIE):
+ _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
+ 'md5': 'c3b740e48d0ba002a42c0b72857beae6',
+ 'info_dict': {
+ 'id': '1_y4tmjm5r',
+ 'ext': 'mp4',
+ 'title': 'Women Who Flirt',
+ 'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
+ 'timestamp': 1496936429,
+ 'upload_date': '20170608',
+ 'uploader_id': 'craig@crifkin.com',
+ 'age_limit': 13,
+ 'categories': 'count:5',
+ 'duration': 5812,
+ },
+ }, {
+ 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ host, video_id = re.match(self._VALID_URL, url).groups()
+
+ if host == 'cocoro.tv':
+ webpage = self._download_webpage(url, video_id)
+ embed_vars = self._parse_json(self._search_regex(
+ r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
+ default='{}'), video_id, fatal=False) or {}
+ video_id = embed_vars.get('entry_id') or video_id
+
+ video = self._download_object_data(host, video_id, 'video')
+ return self._parse_video_data(video)
+
+
+class AsianCrushPlaylistIE(AsianCrushBaseIE):
+ _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
+ 'info_dict': {
+ 'id': '6447',
+ 'title': 'Fruity Samurai',
+ 'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
+ },
+ 'playlist_count': 13,
+ }, {
+ 'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
+ 'only_matching': True,
+ }]
+ _PAGE_SIZE = 1000000000
+
+ def _fetch_page(self, domain, parent_id, page):
+ videos = self._call_api(
+ domain, 'getreferencedobjects', parent_id, {
+ 'max': self._PAGE_SIZE,
+ 'object_type': 'video',
+ 'parent_id': parent_id,
+ 'start': page * self._PAGE_SIZE,
+ }, 'page %d' % (page + 1))
+ for video in videos:
+ yield self._parse_video_data(video)
+
+ def _real_extract(self, url):
+ host, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if host == 'cocoro.tv':
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = []
+
+ for mobj in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
+ webpage):
+ attrs = extract_attributes(mobj.group(0))
+ if attrs.get('class') == 'clearfix':
+ entries.append(self.url_result(
+ mobj.group('url'), ie=AsianCrushIE.ie_key()))
+
+ title = self._html_search_regex(
+ r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
+ 'title', default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title',
+ default=None) or self._search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
+ if title:
+ title = re.sub(r'\s*\|\s*.+?$', '', title)
+
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:description', webpage, 'description', fatal=False)
+ else:
+ show = self._download_object_data(host, playlist_id, 'show')
+ title = show.get('name')
+ description = self._get_object_description(show)
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, host, playlist_id),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(entries, playlist_id, title, description)
diff --git a/hypervideo_dl/extractor/atresplayer.py b/hypervideo_dl/extractor/atresplayer.py
new file mode 100644
index 0000000..c2cec98
--- /dev/null
+++ b/hypervideo_dl/extractor/atresplayer.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class AtresPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
+ _NETRC_MACHINE = 'atresplayer'
+ _TESTS = [
+ {
+ 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
+ 'info_dict': {
+ 'id': '5d4aa2c57ed1a88fc715a615',
+ 'ext': 'mp4',
+ 'title': 'Capítulo 7: Asuntos pendientes',
+ 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
+ 'duration': 3413,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'skip': 'This video is only available for registered users'
+ },
+ {
+ 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
+ 'only_matching': True,
+ },
+ ]
+ _API_BASE = 'https://api.atresplayer.com/'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _handle_error(self, e, code):
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
+ error = self._parse_json(e.cause.read(), None)
+ if error.get('error') == 'required_registered':
+ self.raise_login_required()
+ raise ExtractorError(error['error_description'], expected=True)
+ raise
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ self._request_webpage(
+ self._API_BASE + 'login', None, 'Downloading login page')
+
+ try:
+ target_url = self._download_json(
+ 'https://account.atresmedia.com/api/login', None,
+ 'Logging in', headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ }))['targetUrl']
+ except ExtractorError as e:
+ self._handle_error(e, 400)
+
+ self._request_webpage(target_url, None, 'Following Target URL')
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ try:
+ episode = self._download_json(
+ self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
+ except ExtractorError as e:
+ self._handle_error(e, 403)
+
+ title = episode['titulo']
+
+ formats = []
+ for source in episode.get('sources', []):
+ src = source.get('src')
+ if not src:
+ continue
+ src_type = source.get('type')
+ if src_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ heartbeat = episode.get('heartbeat') or {}
+ omniture = episode.get('omniture') or {}
+ get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
+
+ return {
+ 'display_id': display_id,
+ 'id': video_id,
+ 'title': title,
+ 'description': episode.get('descripcion'),
+ 'thumbnail': episode.get('imgPoster'),
+ 'duration': int_or_none(episode.get('duration')),
+ 'formats': formats,
+ 'channel': get_meta('channel'),
+ 'season': get_meta('season'),
+ 'episode_number': int_or_none(get_meta('episodeNumber')),
+ }
diff --git a/hypervideo_dl/extractor/atttechchannel.py b/hypervideo_dl/extractor/atttechchannel.py
new file mode 100644
index 0000000..8f93fb3
--- /dev/null
+++ b/hypervideo_dl/extractor/atttechchannel.py
@@ -0,0 +1,55 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class ATTTechChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
+ _TEST = {
+ 'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
+ 'info_dict': {
+ 'id': '11316',
+ 'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
+ 'ext': 'flv',
+ 'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
+ 'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140127',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r"url\s*:\s*'(rtmp://[^']+)'",
+ webpage, 'video URL')
+
+ video_id = self._search_regex(
+ r'mediaid\s*=\s*(\d+)',
+ webpage, 'video id', fatal=False)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
+ webpage, 'upload date', fatal=False), False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }
diff --git a/hypervideo_dl/extractor/atvat.py b/hypervideo_dl/extractor/atvat.py
new file mode 100644
index 0000000..95e572d
--- /dev/null
+++ b/hypervideo_dl/extractor/atvat.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class ATVAtIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
+ _TESTS = [{
+ 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
+ 'md5': 'c3b6b975fb3150fc628572939df205f2',
+ 'info_dict': {
+ 'id': '1698447',
+ 'ext': 'mp4',
+ 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
+ }
+ }, {
+ 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_data = self._parse_json(unescapeHTML(self._search_regex(
+ [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
+ r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
+ webpage, 'player data', group='json')),
+ display_id)['config']['initial_video']
+
+ video_id = video_data['id']
+ video_title = video_data['title']
+
+ parts = []
+ for part in video_data.get('parts', []):
+ part_id = part['id']
+ part_title = part['title']
+
+ formats = []
+ for source in part.get('sources', []):
+ source_url = source.get('src')
+ if not source_url:
+ continue
+ ext = determine_ext(source_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, part_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': source.get('delivery'),
+ 'url': source_url,
+ })
+ self._sort_formats(formats)
+
+ parts.append({
+ 'id': part_id,
+ 'title': part_title,
+ 'thumbnail': part.get('preview_image_url'),
+ 'duration': int_or_none(part.get('duration')),
+ 'is_live': part.get('is_livestream'),
+ 'formats': formats,
+ })
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'title': video_title,
+ 'entries': parts,
+ }
diff --git a/hypervideo_dl/extractor/audimedia.py b/hypervideo_dl/extractor/audimedia.py
new file mode 100644
index 0000000..6bd48ef
--- /dev/null
+++ b/hypervideo_dl/extractor/audimedia.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class AudiMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
+ 'md5': '79a8b71c46d49042609795ab59779b66',
+ 'info_dict': {
+ 'id': '1565',
+ 'ext': 'mp4',
+ 'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
+ 'description': 'md5:60e5d30a78ced725f7b8d34370762941',
+ 'upload_date': '20151124',
+ 'timestamp': 1448354940,
+ 'duration': 74022,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ raw_payload = self._search_regex([
+ r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
+ r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
+ r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
+ r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
+ r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
+ ], webpage, 'raw payload')
+ _, stage_mode, video_id, _ = raw_payload.split('-')
+
+ # TODO: handle s and e stage_mode (live streams and ended live streams)
+ if stage_mode not in ('s', 'e'):
+ video_data = self._download_json(
+ 'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
+ video_id, query={
+ 'embed[]': ['video_versions', 'thumbnail_image'],
+ })['results']
+ formats = []
+
+ stream_url_hls = video_data.get('stream_url_hls')
+ if stream_url_hls:
+ formats.extend(self._extract_m3u8_formats(
+ stream_url_hls, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ stream_url_hds = video_data.get('stream_url_hds')
+ if stream_url_hds:
+ formats.extend(self._extract_f4m_formats(
+ stream_url_hds + '?hdcore=3.4.0',
+ video_id, f4m_id='hds', fatal=False))
+
+ for video_version in video_data.get('video_versions', []):
+ video_version_url = video_version.get('download_url') or video_version.get('stream_url')
+ if not video_version_url:
+ continue
+ f = {
+ 'url': video_version_url,
+ 'width': int_or_none(video_version.get('width')),
+ 'height': int_or_none(video_version.get('height')),
+ 'abr': int_or_none(video_version.get('audio_bitrate')),
+ 'vbr': int_or_none(video_version.get('video_bitrate')),
+ }
+ bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
+ if bitrate:
+ f.update({
+ 'format_id': 'http-%s' % bitrate,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'description': video_data.get('subtitle'),
+ 'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
+ 'timestamp': parse_iso8601(video_data.get('publication_date')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'view_count': int_or_none(video_data.get('view_count')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/audioboom.py b/hypervideo_dl/extractor/audioboom.py
new file mode 100644
index 0000000..c51837b
--- /dev/null
+++ b/hypervideo_dl/extractor/audioboom.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ float_or_none,
+)
+
+
+class AudioBoomIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
+ 'md5': '7b00192e593ff227e6a315486979a42d',
+ 'info_dict': {
+ 'id': '7398103',
+ 'ext': 'mp3',
+ 'title': 'Asim Chaudhry',
+ 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
+ 'duration': 4000.99,
+ 'uploader': 'Sue Perkins: An hour or so with...',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
+ }
+ }, {
+ 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ clip = None
+
+ clip_store = self._parse_json(
+ self._html_search_regex(
+ r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
+ webpage, 'clip store', default='{}', group='json'),
+ video_id, fatal=False)
+ if clip_store:
+ clips = clip_store.get('clips')
+ if clips and isinstance(clips, list) and isinstance(clips[0], dict):
+ clip = clips[0]
+
+ def from_clip(field):
+ if clip:
+ return clip.get(field)
+
+ audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
+ 'audio', webpage, 'audio url')
+ title = from_clip('title') or self._html_search_meta(
+ ['og:title', 'og:audio:title', 'audio_title'], webpage)
+ description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
+
+ duration = float_or_none(from_clip('duration') or self._html_search_meta(
+ 'weibo:audio:duration', webpage))
+
+ uploader = from_clip('author') or self._html_search_meta(
+ ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
+ uploader_url = from_clip('author_url') or self._html_search_meta(
+ 'audioboo:channel', webpage, 'uploader url')
+
+ return {
+ 'id': video_id,
+ 'url': audio_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ }
diff --git a/hypervideo_dl/extractor/audiomack.py b/hypervideo_dl/extractor/audiomack.py
new file mode 100644
index 0000000..cc77713
--- /dev/null
+++ b/hypervideo_dl/extractor/audiomack.py
@@ -0,0 +1,145 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import time
+
+from .common import InfoExtractor
+from .soundcloud import SoundcloudIE
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ url_basename,
+)
+
+
+class AudiomackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack'
+ _TESTS = [
+ # hosted on audiomack
+ {
+ 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
+ 'info_dict':
+ {
+ 'id': '310086',
+ 'ext': 'mp3',
+ 'uploader': 'Roosh Williams',
+ 'title': 'Extraordinary'
+ }
+ },
+ # audiomack wrapper around soundcloud song
+ {
+ 'add_ie': ['Soundcloud'],
+ 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
+ 'info_dict': {
+ 'id': '258901379',
+ 'ext': 'mp3',
+ 'description': 'mamba day freestyle for the legend Kobe Bryant ',
+ 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
+ 'uploader': 'ILOVEMAKONNEN',
+ 'upload_date': '20160414',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ # URLs end with [uploader name]/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url)
+
+ # Request the extended version of the api for extra fields like artist and title
+ api_response = self._download_json(
+ 'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
+ album_url_tag, time.time()),
+ album_url_tag)
+
+ # API is inconsistent with errors
+ if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
+ raise ExtractorError('Invalid url %s' % url)
+
+ # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+ # if so, pass the work off to the soundcloud extractor
+ if SoundcloudIE.suitable(api_response['url']):
+ return self.url_result(api_response['url'], SoundcloudIE.ie_key())
+
+ return {
+ 'id': compat_str(api_response.get('id', album_url_tag)),
+ 'uploader': api_response.get('artist'),
+ 'title': api_response.get('title'),
+ 'url': api_response['url'],
+ }
+
+
+class AudiomackAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack:album'
+ _TESTS = [
+ # Standard album playlist
+ {
+ 'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
+ 'playlist_count': 15,
+ 'info_dict':
+ {
+ 'id': '812251',
+ 'title': 'Tha Tour: Part 2 (Official Mixtape)'
+ }
+ },
+ # Album playlist ripped from fakeshoredrive with no metadata
+ {
+ 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project)',
+ 'id': '837572',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
+ 'id': '837577',
+ 'ext': 'mp3',
+ 'uploader': 'Lil Herb a.k.a. G Herbo',
+ }
+ }],
+ 'params': {
+ 'playliststart': 9,
+ 'playlistend': 9,
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ # URLs end with [uploader name]/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url)
+ result = {'_type': 'playlist', 'entries': []}
+ # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
+ # Therefore we don't know how many songs the album has and must infi-loop until failure
+ for track_no in itertools.count():
+ # Get song's metadata
+ api_response = self._download_json(
+ 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
+ % (album_url_tag, track_no, time.time()), album_url_tag,
+ note='Querying song information (%d)' % (track_no + 1))
+
+ # Total failure, only occurs when url is totally wrong
+ # Won't happen in middle of valid playlist (next case)
+ if 'url' not in api_response or 'error' in api_response:
+ raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
+ # URL is good but song id doesn't exist - usually means end of playlist
+ elif not api_response['url']:
+ break
+ else:
+ # Pull out the album metadata and add to result (if it exists)
+ for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
+ if apikey in api_response and resultkey not in result:
+ result[resultkey] = api_response[apikey]
+ song_id = url_basename(api_response['url']).rpartition('.')[0]
+ result['entries'].append({
+ 'id': compat_str(api_response.get('id', song_id)),
+ 'uploader': api_response.get('artist'),
+ 'title': api_response.get('title', song_id),
+ 'url': api_response['url'],
+ })
+ return result
diff --git a/hypervideo_dl/extractor/awaan.py b/hypervideo_dl/extractor/awaan.py
new file mode 100644
index 0000000..3a7700c
--- /dev/null
+++ b/hypervideo_dl/extractor/awaan.py
@@ -0,0 +1,187 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_str,
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ smuggle_url,
+ unsmuggle_url,
+ urlencode_postdata,
+)
+
+
+class AWAANIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
+
+ def _real_extract(self, url):
+ show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
+ if video_id and int(video_id) > 0:
+ return self.url_result(
+ 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
+ elif season_id and int(season_id) > 0:
+ return self.url_result(smuggle_url(
+ 'http://awaan.ae/program/season/%s' % season_id,
+ {'show_id': show_id}), 'AWAANSeason')
+ else:
+ return self.url_result(
+ 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
+
+
+class AWAANBaseIE(InfoExtractor):
+ def _parse_video_data(self, video_data, video_id, is_live):
+ title = video_data.get('title_en') or video_data['title_ar']
+ img = video_data.get('img')
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': video_data.get('description_en') or video_data.get('description_ar'),
+ 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
+ 'is_live': is_live,
+ 'uploader_id': video_data.get('user_id'),
+ }
+
+
+class AWAANVideoIE(AWAANBaseIE):
+ IE_NAME = 'awaan:video'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
+ 'md5': '5f61c33bfc7794315c671a62d43116aa',
+ 'info_dict':
+ {
+ 'id': '17375',
+ 'ext': 'mp4',
+ 'title': 'رحلة العمر : الحلقة 1',
+ 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
+ 'duration': 2041,
+ 'timestamp': 1227504126,
+ 'upload_date': '20081124',
+ 'uploader_id': '71',
+ },
+ }, {
+ 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
+ video_id, headers={'Origin': 'http://awaan.ae'})
+ info = self._parse_video_data(video_data, video_id, False)
+
+ embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
+ 'id': video_data['id'],
+ 'user_id': video_data['user_id'],
+ 'signature': video_data['signature'],
+ 'countries': 'Q0M=',
+ 'filter': 'DENY',
+ })
+ info.update({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'ie_key': 'MangomoloVideo',
+ })
+ return info
+
+
+class AWAANLiveIE(AWAANBaseIE):
+ IE_NAME = 'awaan:live'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://awaan.ae/live/6/dubai-tv',
+ 'info_dict': {
+ 'id': '6',
+ 'ext': 'mp4',
+ 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'upload_date': '20150107',
+ 'timestamp': 1420588800,
+ 'uploader_id': '71',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ channel_data = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
+ channel_id, headers={'Origin': 'http://awaan.ae'})
+ info = self._parse_video_data(channel_data, channel_id, True)
+
+ embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
+ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
+ 'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
+ 'signature': channel_data['signature'],
+ 'countries': 'Q0M=',
+ 'filter': 'DENY',
+ })
+ info.update({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'ie_key': 'MangomoloLive',
+ })
+ return info
+
+
+class AWAANSeasonIE(InfoExtractor):
+ IE_NAME = 'awaan:season'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
+ _TEST = {
+ 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
+ 'info_dict':
+ {
+ 'id': '7910',
+ 'title': 'محاضرات الشيخ الشعراوي',
+ },
+ 'playlist_mincount': 27,
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ show_id, season_id = re.match(self._VALID_URL, url).groups()
+
+ data = {}
+ if season_id:
+ data['season'] = season_id
+ show_id = smuggled_data.get('show_id')
+ if show_id is None:
+ season = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
+ season_id, headers={'Origin': 'http://awaan.ae'})
+ show_id = season['id']
+ data['show_id'] = show_id
+ show = self._download_json(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/show',
+ show_id, data=urlencode_postdata(data), headers={
+ 'Origin': 'http://awaan.ae',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ if not season_id:
+ season_id = show['default_season']
+ for season in show['seasons']:
+ if season['id'] == season_id:
+ title = season.get('title_en') or season['title_ar']
+
+ entries = []
+ for video in show['videos']:
+ video_id = compat_str(video['id'])
+ entries.append(self.url_result(
+ 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
+
+ return self.playlist_result(entries, season_id, title)
diff --git a/hypervideo_dl/extractor/aws.py b/hypervideo_dl/extractor/aws.py
new file mode 100644
index 0000000..dccfeaf
--- /dev/null
+++ b/hypervideo_dl/extractor/aws.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import hashlib
+import hmac
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+
+
+class AWSIE(InfoExtractor):
+ _AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
+ _AWS_REGION = 'us-east-1'
+
+ def _aws_execute_api(self, aws_dict, video_id, query=None):
+ query = query or {}
+ amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+ date = amz_date[:8]
+ headers = {
+ 'Accept': 'application/json',
+ 'Host': self._AWS_PROXY_HOST,
+ 'X-Amz-Date': amz_date,
+ 'X-Api-Key': self._AWS_API_KEY
+ }
+ session_token = aws_dict.get('session_token')
+ if session_token:
+ headers['X-Amz-Security-Token'] = session_token
+
+ def aws_hash(s):
+ return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+ # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ canonical_querystring = compat_urllib_parse_urlencode(query)
+ canonical_headers = ''
+ for header_name, header_value in sorted(headers.items()):
+ canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
+ signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
+ canonical_request = '\n'.join([
+ 'GET',
+ aws_dict['uri'],
+ canonical_querystring,
+ canonical_headers,
+ signed_headers,
+ aws_hash('')
+ ])
+
+ # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+ credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
+ credential_scope = '/'.join(credential_scope_list)
+ string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
+
+ # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+ def aws_hmac(key, msg):
+ return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+ def aws_hmac_digest(key, msg):
+ return aws_hmac(key, msg).digest()
+
+ def aws_hmac_hexdigest(key, msg):
+ return aws_hmac(key, msg).hexdigest()
+
+ k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
+ for value in credential_scope_list:
+ k_signing = aws_hmac_digest(k_signing, value)
+
+ signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+ # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
+ headers['Authorization'] = ', '.join([
+ '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
+ 'SignedHeaders=%s' % signed_headers,
+ 'Signature=%s' % signature,
+ ])
+
+ return self._download_json(
+ 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
+ video_id, headers=headers)
diff --git a/hypervideo_dl/extractor/azmedien.py b/hypervideo_dl/extractor/azmedien.py
new file mode 100644
index 0000000..9302669
--- /dev/null
+++ b/hypervideo_dl/extractor/azmedien.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class AZMedienIE(InfoExtractor):
+ IE_DESC = 'AZ Medien videos'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?P<host>
+ telezueri\.ch|
+ telebaern\.tv|
+ telem1\.ch
+ )/
+ [^/]+/
+ (?P<id>
+ [^/]+-(?P<article_id>\d+)
+ )
+ (?:
+ \#video=
+ (?P<kaltura_id>
+ [_0-9a-z]+
+ )
+ )?
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
+ 'info_dict': {
+ 'id': '1_anruz3wy',
+ 'ext': 'mp4',
+ 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+ 'uploader_id': 'TVOnline',
+ 'upload_date': '20180930',
+ 'timestamp': 1538328802,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
+ 'only_matching': True
+ }]
+ _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
+ _PARTNER_ID = '1719221'
+
+ def _real_extract(self, url):
+ host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
+
+ if not entry_id:
+ entry_id = self._download_json(
+ self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
+ 'variables': json.dumps({
+ 'contextId': 'NewsArticle:' + article_id,
+ }),
+ })['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
+ ie=KalturaIE.ie_key(), video_id=entry_id)
diff --git a/hypervideo_dl/extractor/baidu.py b/hypervideo_dl/extractor/baidu.py
new file mode 100644
index 0000000..234a661
--- /dev/null
+++ b/hypervideo_dl/extractor/baidu.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class BaiduVideoIE(InfoExtractor):
+ IE_DESC = '百度视频'
+ _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+ _TESTS = [{
+ 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
+ 'info_dict': {
+ 'id': '1069',
+ 'title': '中华小当家 TV版国语',
+ 'description': 'md5:51be07afe461cf99fa61231421b5397c',
+ },
+ 'playlist_count': 52,
+ }, {
+ 'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
+ 'info_dict': {
+ 'id': '11595',
+ 'title': 're:^奔跑吧兄弟',
+ 'description': 'md5:1bf88bad6d850930f542d51547c089b8',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _call_api(self, path, category, playlist_id, note):
+ return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
+ path, category, playlist_id), playlist_id, note)
+
+ def _real_extract(self, url):
+ category, playlist_id = re.match(self._VALID_URL, url).groups()
+ if category == 'show':
+ category = 'tvshow'
+ if category == 'tv':
+ category = 'tvplay'
+
+ playlist_detail = self._call_api(
+ 'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
+
+ playlist_title = playlist_detail['title']
+ playlist_description = unescapeHTML(playlist_detail.get('intro'))
+
+ episodes_detail = self._call_api(
+ 'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
+
+ entries = [self.url_result(
+ episode['url'], video_title=episode['title']
+ ) for episode in episodes_detail['videos']]
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
diff --git a/hypervideo_dl/extractor/bandaichannel.py b/hypervideo_dl/extractor/bandaichannel.py
new file mode 100644
index 0000000..d672859
--- /dev/null
+++ b/hypervideo_dl/extractor/bandaichannel.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from ..utils import extract_attributes
+
+
+class BandaiChannelIE(BrightcoveNewIE):
+ IE_NAME = 'bandaichannel'
+ _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
+ _TESTS = [{
+ 'url': 'https://www.b-ch.com/titles/514/001',
+ 'md5': 'a0f2d787baa5729bed71108257f613a4',
+ 'info_dict': {
+ 'id': '6128044564001',
+ 'ext': 'mp4',
+ 'title': 'メタルファイターMIKU 第1話',
+ 'timestamp': 1580354056,
+ 'uploader_id': '5797077852001',
+ 'upload_date': '20200130',
+ 'duration': 1387.733,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
+ bc = self._download_json(
+ 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
+ video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
+ return self._parse_brightcove_metadata(bc, bc['id'])
diff --git a/hypervideo_dl/extractor/bandcamp.py b/hypervideo_dl/extractor/bandcamp.py
new file mode 100644
index 0000000..dbe57c7
--- /dev/null
+++ b/hypervideo_dl/extractor/bandcamp.py
@@ -0,0 +1,391 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ parse_filesize,
+ str_or_none,
+ try_get,
+ update_url_query,
+ unified_strdate,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class BandcampIE(InfoExtractor):
+ _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
+ 'md5': 'c557841d5e50261777a6585648adf439',
+ 'info_dict': {
+ 'id': '1812978515',
+ 'ext': 'mp3',
+ 'title': "hypervideo \"'/\\ä↭ - hypervideo \"'/\\ä↭ - hypervideo test song \"'/\\ä↭",
+ 'duration': 9.8485,
+ 'uploader': 'hypervideo "\'/\\ä↭',
+ 'upload_date': '20121129',
+ 'timestamp': 1354224127,
+ },
+ '_skip': 'There is a limit of 200 free downloads / month for the test song'
+ }, {
+ # free download
+ 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
+ 'info_dict': {
+ 'id': '2650410135',
+ 'ext': 'aiff',
+ 'title': 'Ben Prunty - Lanius (Battle)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Ben Prunty',
+ 'timestamp': 1396508491,
+ 'upload_date': '20140403',
+ 'release_timestamp': 1396483200,
+ 'release_date': '20140403',
+ 'duration': 260.877,
+ 'track': 'Lanius (Battle)',
+ 'track_number': 1,
+ 'track_id': '2650410135',
+ 'artist': 'Ben Prunty',
+ 'album': 'FTL: Advanced Edition Soundtrack',
+ },
+ }, {
+ # no free download, mp3 128
+ 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
+ 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
+ 'info_dict': {
+ 'id': '2584466013',
+ 'ext': 'mp3',
+ 'title': 'Mastodon - Hail to Fire',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mastodon',
+ 'timestamp': 1322005399,
+ 'upload_date': '20111122',
+ 'release_timestamp': 1076112000,
+ 'release_date': '20040207',
+ 'duration': 120.79,
+ 'track': 'Hail to Fire',
+ 'track_number': 5,
+ 'track_id': '2584466013',
+ 'artist': 'Mastodon',
+ 'album': 'Call of the Mastodon',
+ },
+ }]
+
+ def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
+ return self._parse_json(self._html_search_regex(
+ r'data-%s=(["\'])({.+?})\1' % attr, webpage,
+ attr + ' data', group=2), video_id, fatal=fatal)
+
+ def _real_extract(self, url):
+ title = self._match_id(url)
+ webpage = self._download_webpage(url, title)
+ tralbum = self._extract_data_attr(webpage, title)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ track_id = None
+ track = None
+ track_number = None
+ duration = None
+
+ formats = []
+ track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
+ if track_info:
+ file_ = track_info.get('file')
+ if isinstance(file_, dict):
+ for format_id, format_url in file_.items():
+ if not url_or_none(format_url):
+ continue
+ ext, abr_str = format_id.split('-', 1)
+ formats.append({
+ 'format_id': format_id,
+ 'url': self._proto_relative_url(format_url, 'http:'),
+ 'ext': ext,
+ 'vcodec': 'none',
+ 'acodec': ext,
+ 'abr': int_or_none(abr_str),
+ })
+ track = track_info.get('title')
+ track_id = str_or_none(
+ track_info.get('track_id') or track_info.get('id'))
+ track_number = int_or_none(track_info.get('track_num'))
+ duration = float_or_none(track_info.get('duration'))
+
+ embed = self._extract_data_attr(webpage, title, 'embed', False)
+ current = tralbum.get('current') or {}
+ artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
+ timestamp = unified_timestamp(
+ current.get('publish_date') or tralbum.get('album_publish_date'))
+
+ download_link = tralbum.get('freeDownloadPage')
+ if download_link:
+ track_id = compat_str(tralbum['id'])
+
+ download_webpage = self._download_webpage(
+ download_link, track_id, 'Downloading free downloads page')
+
+ blob = self._extract_data_attr(download_webpage, track_id, 'blob')
+
+ info = try_get(
+ blob, (lambda x: x['digital_items'][0],
+ lambda x: x['download_items'][0]), dict)
+ if info:
+ downloads = info.get('downloads')
+ if isinstance(downloads, dict):
+ if not track:
+ track = info.get('title')
+ if not artist:
+ artist = info.get('artist')
+ if not thumbnail:
+ thumbnail = info.get('thumb_url')
+
+ download_formats = {}
+ download_formats_list = blob.get('download_formats')
+ if isinstance(download_formats_list, list):
+ for f in blob['download_formats']:
+ name, ext = f.get('name'), f.get('file_extension')
+ if all(isinstance(x, compat_str) for x in (name, ext)):
+ download_formats[name] = ext.strip('.')
+
+ for format_id, f in downloads.items():
+ format_url = f.get('url')
+ if not format_url:
+ continue
+ # Stat URL generation algorithm is reverse engineered from
+ # download_*_bundle_*.js
+ stat_url = update_url_query(
+ format_url.replace('/download/', '/statdownload/'), {
+ '.rand': int(time.time() * 1000 * random.random()),
+ })
+ format_id = f.get('encoding_name') or format_id
+ stat = self._download_json(
+ stat_url, track_id, 'Downloading %s JSON' % format_id,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
+ fatal=False)
+ if not stat:
+ continue
+ retry_url = url_or_none(stat.get('retry_url'))
+ if not retry_url:
+ continue
+ formats.append({
+ 'url': self._proto_relative_url(retry_url, 'http:'),
+ 'ext': download_formats.get(format_id),
+ 'format_id': format_id,
+ 'format_note': f.get('description'),
+ 'filesize': parse_filesize(f.get('size_mb')),
+ 'vcodec': 'none',
+ })
+
+ self._sort_formats(formats)
+
+ title = '%s - %s' % (artist, track) if artist else track
+
+ if not duration:
+ duration = float_or_none(self._html_search_meta(
+ 'duration', webpage, default=None))
+
+ return {
+ 'id': track_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': artist,
+ 'timestamp': timestamp,
+ 'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
+ 'duration': duration,
+ 'track': track,
+ 'track_number': track_number,
+ 'track_id': track_id,
+ 'artist': artist,
+ 'album': embed.get('album_title'),
+ 'formats': formats,
+ }
+
+
+class BandcampAlbumIE(BandcampIE):
+ IE_NAME = 'Bandcamp:album'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
+
+ _TESTS = [{
+ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+ 'playlist': [
+ {
+ 'md5': '39bc1eded3476e927c724321ddf116cf',
+ 'info_dict': {
+ 'id': '1353101989',
+ 'ext': 'mp3',
+ 'title': 'Blazo - Intro',
+ 'timestamp': 1311756226,
+ 'upload_date': '20110727',
+ 'uploader': 'Blazo',
+ }
+ },
+ {
+ 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
+ 'info_dict': {
+ 'id': '38097443',
+ 'ext': 'mp3',
+ 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
+ 'timestamp': 1311757238,
+ 'upload_date': '20110727',
+ 'uploader': 'Blazo',
+ }
+ },
+ ],
+ 'info_dict': {
+ 'title': 'Jazz Format Mixtape vol.1',
+ 'id': 'jazz-format-mixtape-vol-1',
+ 'uploader_id': 'blazo',
+ },
+ 'params': {
+ 'playlistend': 2
+ },
+ 'skip': 'Bandcamp imposes download limits.'
+ }, {
+ 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+ 'info_dict': {
+ 'title': 'Hierophany of the Open Grave',
+ 'uploader_id': 'nightbringer',
+ 'id': 'hierophany-of-the-open-grave',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'http://dotscale.bandcamp.com',
+ 'info_dict': {
+ 'title': 'Loom',
+ 'id': 'dotscale',
+ 'uploader_id': 'dotscale',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ # with escaped quote in title
+ 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
+ 'info_dict': {
+ 'title': '"Entropy" EP',
+ 'uploader_id': 'jstrecords',
+ 'id': 'entropy-ep',
+ 'description': 'md5:0ff22959c943622972596062f2f366a5',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ # not all tracks have songs
+ 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
+ 'info_dict': {
+ 'id': 'we-are-the-plague',
+ 'title': 'WE ARE THE PLAGUE',
+ 'uploader_id': 'insulters',
+ 'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
+ },
+ 'playlist_count': 2,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
+ else super(BandcampAlbumIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ uploader_id, album_id = re.match(self._VALID_URL, url).groups()
+ playlist_id = album_id or uploader_id
+ webpage = self._download_webpage(url, playlist_id)
+ tralbum = self._extract_data_attr(webpage, playlist_id)
+ track_info = tralbum.get('trackinfo')
+ if not track_info:
+ raise ExtractorError('The page doesn\'t contain any tracks')
+ # Only tracks with duration info have songs
+ entries = [
+ self.url_result(
+ urljoin(url, t['title_link']), BandcampIE.ie_key(),
+ str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
+ for t in track_info
+ if t.get('duration')]
+
+ current = tralbum.get('current') or {}
+
+ return {
+ '_type': 'playlist',
+ 'uploader_id': uploader_id,
+ 'id': playlist_id,
+ 'title': current.get('title'),
+ 'description': current.get('about'),
+ 'entries': entries,
+ }
+
+
+class BandcampWeeklyIE(BandcampIE):
+ IE_NAME = 'Bandcamp:weekly'
+ _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://bandcamp.com/?show=224',
+ 'md5': 'b00df799c733cf7e0c567ed187dea0fd',
+ 'info_dict': {
+ 'id': '224',
+ 'ext': 'opus',
+ 'title': 'BC Weekly April 4th 2017 - Magic Moments',
+ 'description': 'md5:5d48150916e8e02d030623a48512c874',
+ 'duration': 5829.77,
+ 'release_date': '20170404',
+ 'series': 'Bandcamp Weekly',
+ 'episode': 'Magic Moments',
+ 'episode_id': '224',
+ },
+ 'params': {
+ 'format': 'opus-lo',
+ },
+ }, {
+ 'url': 'https://bandcamp.com/?blah/blah@&show=228',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ webpage = self._download_webpage(url, show_id)
+
+ blob = self._extract_data_attr(webpage, show_id, 'blob')
+
+ show = blob['bcw_data'][show_id]
+
+ formats = []
+ for format_id, format_url in show['audio_stream'].items():
+ if not url_or_none(format_url):
+ continue
+ for known_ext in KNOWN_EXTENSIONS:
+ if known_ext in format_id:
+ ext = known_ext
+ break
+ else:
+ ext = None
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': ext,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ title = show.get('audio_title') or 'Bandcamp Weekly'
+ subtitle = show.get('subtitle')
+ if subtitle:
+ title += ' - %s' % subtitle
+
+ return {
+ 'id': show_id,
+ 'title': title,
+ 'description': show.get('desc') or show.get('short_desc'),
+ 'duration': float_or_none(show.get('audio_duration')),
+ 'is_live': False,
+ 'release_date': unified_strdate(show.get('published_date')),
+ 'series': 'Bandcamp Weekly',
+ 'episode': show.get('subtitle'),
+ 'episode_id': show_id,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/bbc.py b/hypervideo_dl/extractor/bbc.py
new file mode 100644
index 0000000..247d982
--- /dev/null
+++ b/hypervideo_dl/extractor/bbc.py
@@ -0,0 +1,1623 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import itertools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_Element,
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ clean_html,
+ dict_get,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ parse_duration,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class BBCCoUkIE(InfoExtractor):
+ IE_NAME = 'bbc.co.uk'
+ IE_DESC = 'BBC iPlayer'
+ _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?bbc\.co\.uk/
+ (?:
+ programmes/(?!articles/)|
+ iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
+ music/(?:clips|audiovideo/popular)[/#]|
+ radio/player/|
+ sounds/play/|
+ events/[^/]+/play/[^/]+/
+ )
+ (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
+ ''' % _ID_REGEX
+
+ _LOGIN_URL = 'https://account.bbc.com/signin'
+ _NETRC_MACHINE = 'bbc'
+
+ _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
+ _MEDIA_SETS = [
+ # Provides HQ HLS streams with even better quality that pc mediaset but fails
+ # with geolocation in some cases when it's even not geo restricted at all (e.g.
+ # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
+ 'iptv-all',
+ 'pc',
+ ]
+
+ _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
+ 'info_dict': {
+ 'id': 'b039d07m',
+ 'ext': 'flv',
+ 'title': 'Kaleidoscope, Leonard Cohen',
+ 'description': 'The Canadian poet and songwriter reflects on his musical career.',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
+ 'info_dict': {
+ 'id': 'b00yng1d',
+ 'ext': 'flv',
+ 'title': 'The Man in Black: Series 3: The Printed Name',
+ 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
+ 'duration': 1800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
+ 'info_dict': {
+ 'id': 'b00yng1d',
+ 'ext': 'flv',
+ 'title': 'The Voice UK: Series 3: Blind Auditions 5',
+ 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
+ 'duration': 5100,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
+ 'info_dict': {
+ 'id': 'b03k3pb7',
+ 'ext': 'flv',
+ 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
+ 'description': '2. Invasion',
+ 'duration': 3600,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
+ 'info_dict': {
+ 'id': 'b04v209v',
+ 'ext': 'flv',
+ 'title': 'Pete Tong, The Essential New Tune Special',
+ 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
+ 'duration': 10800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'p022h44j',
+ 'ext': 'flv',
+ 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
+ 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
+ 'duration': 227,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
+ 'note': 'Video',
+ 'info_dict': {
+ 'id': 'p025c103',
+ 'ext': 'flv',
+ 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
+ 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
+ 'duration': 226,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
+ 'info_dict': {
+ 'id': 'p02n76xf',
+ 'ext': 'flv',
+ 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
+ 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
+ 'info_dict': {
+ 'id': 'b05zmgw1',
+ 'ext': 'flv',
+ 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
+ 'title': 'Royal Academy Summer Exhibition',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
+ # iptv-all mediaset fails with geolocation however there is no geo restriction
+ # for this programme at all
+ 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
+ 'info_dict': {
+ 'id': 'b06rkms3',
+ 'ext': 'flv',
+ 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
+ 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Now it\'s really geo-restricted',
+ }, {
+ # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
+ 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
+ 'info_dict': {
+ 'id': 'p028bfkj',
+ 'ext': 'flv',
+ 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+ 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'm0007jz9',
+ 'ext': 'mp4',
+ 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
+ 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
+ 'duration': 9840,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
+ 'only_matching': True,
+ }]
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading signin page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url'))
+
+ response, urlh = self._download_webpage_handle(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ if self._LOGIN_URL in urlh.geturl():
+ error = clean_html(get_element_by_class('form-message', response))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ class MediaSelectionError(Exception):
+ def __init__(self, id):
+ self.id = id
+
+ def _extract_asx_playlist(self, connection, programme_id):
+ asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
+ return [ref.get('href') for ref in asx.findall('./Entry/ref')]
+
+ def _extract_items(self, playlist):
+ return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
+
+ def _extract_medias(self, media_selection):
+ error = media_selection.get('result')
+ if error:
+ raise BBCCoUkIE.MediaSelectionError(error)
+ return media_selection.get('media') or []
+
+ def _extract_connections(self, media):
+ return media.get('connection') or []
+
+ def _get_subtitles(self, media, programme_id):
+ subtitles = {}
+ for connection in self._extract_connections(media):
+ cc_url = url_or_none(connection.get('href'))
+ if not cc_url:
+ continue
+ captions = self._download_xml(
+ cc_url, programme_id, 'Downloading captions', fatal=False)
+ if not isinstance(captions, compat_etree_Element):
+ continue
+ subtitles['en'] = [
+ {
+ 'url': connection.get('href'),
+ 'ext': 'ttml',
+ },
+ ]
+ break
+ return subtitles
+
+ def _raise_extractor_error(self, media_selection_error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
+ expected=True)
+
+ def _download_media_selector(self, programme_id):
+ last_exception = None
+ for media_set in self._MEDIA_SETS:
+ try:
+ return self._download_media_selector_url(
+ self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
+ except BBCCoUkIE.MediaSelectionError as e:
+ if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
+ last_exception = e
+ continue
+ self._raise_extractor_error(e)
+ self._raise_extractor_error(last_exception)
+
+ def _download_media_selector_url(self, url, programme_id=None):
+ media_selection = self._download_json(
+ url, programme_id, 'Downloading media selection JSON',
+ expected_status=(403, 404))
+ return self._process_media_selector(media_selection, programme_id)
+
+ def _process_media_selector(self, media_selection, programme_id):
+ formats = []
+ subtitles = None
+ urls = []
+
+ for media in self._extract_medias(media_selection):
+ kind = media.get('kind')
+ if kind in ('video', 'audio'):
+ bitrate = int_or_none(media.get('bitrate'))
+ encoding = media.get('encoding')
+ width = int_or_none(media.get('width'))
+ height = int_or_none(media.get('height'))
+ file_size = int_or_none(media.get('media_file_size'))
+ for connection in self._extract_connections(media):
+ href = connection.get('href')
+ if href in urls:
+ continue
+ if href:
+ urls.append(href)
+ conn_kind = connection.get('kind')
+ protocol = connection.get('protocol')
+ supplier = connection.get('supplier')
+ transfer_format = connection.get('transferFormat')
+ format_id = supplier or conn_kind or protocol
+ # ASX playlist
+ if supplier == 'asx':
+ for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
+ formats.append({
+ 'url': ref,
+ 'format_id': 'ref%s_%s' % (i, format_id),
+ })
+ elif transfer_format == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ href, programme_id, mpd_id=format_id, fatal=False))
+ elif transfer_format == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif transfer_format == 'hds':
+ formats.extend(self._extract_f4m_formats(
+ href, programme_id, f4m_id=format_id, fatal=False))
+ else:
+ if not supplier and bitrate:
+ format_id += '-%d' % bitrate
+ fmt = {
+ 'format_id': format_id,
+ 'filesize': file_size,
+ }
+ if kind == 'video':
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'tbr': bitrate,
+ 'vcodec': encoding,
+ })
+ else:
+ fmt.update({
+ 'abr': bitrate,
+ 'acodec': encoding,
+ 'vcodec': 'none',
+ })
+ if protocol in ('http', 'https'):
+ # Direct link
+ fmt.update({
+ 'url': href,
+ })
+ elif protocol == 'rtmp':
+ application = connection.get('application', 'ondemand')
+ auth_string = connection.get('authString')
+ identifier = connection.get('identifier')
+ server = connection.get('server')
+ fmt.update({
+ 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
+ 'play_path': identifier,
+ 'app': '%s?%s' % (application, auth_string),
+ 'page_url': 'http://www.bbc.co.uk',
+ 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
+ 'rtmp_live': False,
+ 'ext': 'flv',
+ })
+ else:
+ continue
+ formats.append(fmt)
+ elif kind == 'captions':
+ subtitles = self.extract_subtitles(media, programme_id)
+ return formats, subtitles
+
+ def _download_playlist(self, playlist_id):
+ try:
+ playlist = self._download_json(
+ 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
+ playlist_id, 'Downloading playlist JSON')
+
+ version = playlist.get('defaultAvailableVersion')
+ if version:
+ smp_config = version['smpConfig']
+ title = smp_config['title']
+ description = smp_config['summary']
+ for item in smp_config['items']:
+ kind = item['kind']
+ if kind not in ('programme', 'radioProgramme'):
+ continue
+ programme_id = item.get('vpid')
+ duration = int_or_none(item.get('duration'))
+ formats, subtitles = self._download_media_selector(programme_id)
+ return programme_id, title, description, duration, formats, subtitles
+ except ExtractorError as ee:
+ if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
+ raise
+
+ # fallback to legacy playlist
+ return self._process_legacy_playlist(playlist_id)
+
+ def _process_legacy_playlist_url(self, url, display_id):
+ playlist = self._download_legacy_playlist_url(url, display_id)
+ return self._extract_from_legacy_playlist(playlist, display_id)
+
+ def _process_legacy_playlist(self, playlist_id):
+ return self._process_legacy_playlist_url(
+ 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
+
+ def _download_legacy_playlist_url(self, url, playlist_id=None):
+ return self._download_xml(
+ url, playlist_id, 'Downloading legacy playlist XML')
+
+ def _extract_from_legacy_playlist(self, playlist, playlist_id):
+ no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
+ if no_items is not None:
+ reason = no_items.get('reason')
+ if reason == 'preAvailability':
+ msg = 'Episode %s is not yet available' % playlist_id
+ elif reason == 'postAvailability':
+ msg = 'Episode %s is no longer available' % playlist_id
+ elif reason == 'noMedia':
+ msg = 'Episode %s is not currently available' % playlist_id
+ else:
+ msg = 'Episode %s is not available: %s' % (playlist_id, reason)
+ raise ExtractorError(msg, expected=True)
+
+ for item in self._extract_items(playlist):
+ kind = item.get('kind')
+ if kind not in ('programme', 'radioProgramme'):
+ continue
+ title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
+ description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
+ description = description_el.text if description_el is not None else None
+
+ def get_programme_id(item):
+ def get_from_attributes(item):
+ for p in ('identifier', 'group'):
+ value = item.get(p)
+ if value and re.match(r'^[pb][\da-z]{7}$', value):
+ return value
+ get_from_attributes(item)
+ mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
+ if mediator is not None:
+ return get_from_attributes(mediator)
+
+ programme_id = get_programme_id(item)
+ duration = int_or_none(item.get('duration'))
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ else:
+ formats, subtitles = self._process_media_selector(item, playlist_id)
+ programme_id = playlist_id
+
+ return programme_id, title, description, duration, formats, subtitles
+
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, group_id, 'Downloading video page')
+
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ programme_id = None
+ duration = None
+
+ tviplayer = self._search_regex(
+ r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
+ webpage, 'player', default=None)
+
+ if tviplayer:
+ player = self._parse_json(tviplayer, group_id).get('player', {})
+ duration = int_or_none(player.get('duration'))
+ programme_id = player.get('vpid')
+
+ if not programme_id:
+ programme_id = self._search_regex(
+ r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
+ r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
+ description = self._search_regex(
+ (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
+ r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
+ webpage, 'description', default=None)
+ if not description:
+ description = self._html_search_meta('description', webpage)
+ else:
+ programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class BBCIE(BBCCoUkIE):
+ IE_NAME = 'bbc'
+ IE_DESC = 'BBC'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
+
+ _MEDIA_SETS = [
+ 'mobile-tablet-main',
+ 'pc',
+ ]
+
+ _TESTS = [{
+ # article with multiple videos embedded with data-playable containing vpids
+ 'url': 'http://www.bbc.com/news/world-europe-32668511',
+ 'info_dict': {
+ 'id': 'world-europe-32668511',
+ 'title': 'Russia stages massive WW2 parade',
+ 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
+ },
+ 'playlist_count': 2,
+ }, {
+ # article with multiple videos embedded with data-playable (more videos)
+ 'url': 'http://www.bbc.com/news/business-28299555',
+ 'info_dict': {
+ 'id': 'business-28299555',
+ 'title': 'Farnborough Airshow: Video highlights',
+ 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
+ },
+ 'playlist_count': 9,
+ 'skip': 'Save time',
+ }, {
+ # article with multiple videos embedded with `new SMP()`
+ # broken
+ 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
+ 'info_dict': {
+ 'id': '3662a707-0af9-3149-963f-47bea720b460',
+ 'title': 'BUGGER',
+ },
+ 'playlist_count': 18,
+ }, {
+ # single video embedded with data-playable containing vpid
+ 'url': 'http://www.bbc.com/news/world-europe-32041533',
+ 'info_dict': {
+ 'id': 'p02mprgb',
+ 'ext': 'mp4',
+ 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
+ 'description': 'md5:2868290467291b37feda7863f7a83f54',
+ 'duration': 47,
+ 'timestamp': 1427219242,
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # article with single video embedded with data-playable containing XML playlist
+ # with direct video links as progressiveDownloadUrl (for now these are extracted)
+ # and playlist with f4m and m3u8 as streamingUrl
+ 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
+ 'info_dict': {
+ 'id': '150615_telabyad_kentin_cogu',
+ 'ext': 'mp4',
+ 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
+ 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
+ 'timestamp': 1434397334,
+ 'upload_date': '20150615',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video embedded with data-playable containing XML playlists (regional section)
+ 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'info_dict': {
+ 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'ext': 'mp4',
+ 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
+ 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
+ 'timestamp': 1434713142,
+ 'upload_date': '20150619',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video from video playlist embedded with vxp-playlist-data JSON
+ 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
+ 'info_dict': {
+ 'id': 'p02w6qjc',
+ 'ext': 'mp4',
+ 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ 'duration': 56,
+ 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video story with digitalData
+ 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
+ 'info_dict': {
+ 'id': 'p02q6gc4',
+ 'ext': 'flv',
+ 'title': 'Sri Lanka’s spicy secret',
+ 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
+ 'timestamp': 1437674293,
+ 'upload_date': '20150723',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # single video story without digitalData
+ 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
+ 'info_dict': {
+ 'id': 'p018zqqg',
+ 'ext': 'mp4',
+ 'title': 'Hyundai Santa Fe Sport: Rock star',
+ 'description': 'md5:b042a26142c4154a6e472933cf20793d',
+ 'timestamp': 1415867444,
+ 'upload_date': '20141113',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # single video embedded with Morph
+ 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
+ 'info_dict': {
+ 'id': 'p041vhd0',
+ 'ext': 'mp4',
+ 'title': "Nigeria v Japan - Men's First Round",
+ 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
+ 'duration': 7980,
+ 'uploader': 'BBC Sport',
+ 'uploader_id': 'bbc_sport',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Georestricted to UK',
+ }, {
+ # single video with playlist.sxml URL in playlist param
+ 'url': 'http://www.bbc.com/sport/0/football/33653409',
+ 'info_dict': {
+ 'id': 'p02xycnp',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
+ 'duration': 140,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # article with multiple videos embedded with playlist.sxml in playlist param
+ 'url': 'http://www.bbc.com/sport/0/football/34475836',
+ 'info_dict': {
+ 'id': '34475836',
+ 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
+ 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
+ },
+ 'playlist_count': 3,
+ }, {
+ # school report article with single video
+ 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+ 'info_dict': {
+ 'id': '35744779',
+ 'title': 'School which breaks down barriers in Jerusalem',
+ },
+ 'playlist_count': 1,
+ }, {
+ # single video with playlist URL from weather section
+ 'url': 'http://www.bbc.com/weather/features/33601775',
+ 'only_matching': True,
+ }, {
+ # custom redirection to www.bbc.com
+ # also, video with window.__INITIAL_DATA__
+ 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
+ 'info_dict': {
+ 'id': 'p02xzws1',
+ 'ext': 'mp4',
+ 'title': "Pluto may have 'nitrogen glaciers'",
+ 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1437785037,
+ 'upload_date': '20150725',
+ },
+ }, {
+ # single video article embedded with data-media-vpid
+ 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
+ 'info_dict': {
+ 'id': 'p06556y7',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # window.__PRELOADED_STATE__
+ 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
+ 'info_dict': {
+ 'id': 'b0b9z4vz',
+ 'ext': 'mp4',
+ 'title': 'Prom 6: An American in Paris and Turangalila',
+ 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
+ 'uploader': 'Radio 3',
+ 'uploader_id': 'bbc_radio_three',
+ },
+ }, {
+ 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
+ 'info_dict': {
+ 'id': 'p06w9tws',
+ 'ext': 'mp4',
+ 'title': 'md5:2fabf12a726603193a2879a055f72514',
+ 'description': 'Learn English words and phrases from this story',
+ },
+ 'add_ie': [BBCCoUkIE.ie_key()],
+ }, {
+ # BBC Reel
+ 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
+ 'info_dict': {
+ 'id': 'p07c6sb9',
+ 'ext': 'mp4',
+ 'title': 'How positive thinking is harming your happiness',
+ 'alt_title': 'The downsides of positive thinking',
+ 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+ 'duration': 235,
+ 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
+ 'upload_date': '20190604',
+ 'categories': ['Psychology'],
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
+ return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
+ else super(BBCIE, cls).suitable(url))
+
+ def _extract_from_media_meta(self, media_meta, video_id):
+ # Direct links to media in media metadata (e.g.
+ # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+ # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
+ source_files = media_meta.get('sourceFiles')
+ if source_files:
+ return [{
+ 'url': f['url'],
+ 'format_id': format_id,
+ 'ext': f.get('encoding'),
+ 'tbr': float_or_none(f.get('bitrate'), 1000),
+ 'filesize': int_or_none(f.get('filesize')),
+ } for format_id, f in source_files.items() if f.get('url')], []
+
+ programme_id = media_meta.get('externalId')
+ if programme_id:
+ return self._download_media_selector(programme_id)
+
+ # Process playlist.sxml as legacy playlist
+ href = media_meta.get('href')
+ if href:
+ playlist = self._download_legacy_playlist_url(href)
+ _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
+ return formats, subtitles
+
+ return [], []
+
+ def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
+ programme_id, title, description, duration, formats, subtitles = \
+ self._process_legacy_playlist_url(url, playlist_id)
+ self._sort_formats(formats)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
+ timestamp = json_ld_info.get('timestamp')
+
+ playlist_title = json_ld_info.get('title')
+ if not playlist_title:
+ playlist_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+ if playlist_title:
+ playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+
+ playlist_description = json_ld_info.get(
+ 'description') or self._og_search_description(webpage, default=None)
+
+ if not timestamp:
+ timestamp = parse_iso8601(self._search_regex(
+ [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
+ r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
+ r'"datePublished":\s*"([^"]+)'],
+ webpage, 'date', default=None))
+
+ entries = []
+
+ # article with multiple videos embedded with playlist.sxml (e.g.
+ # http://www.bbc.com/sport/0/football/34475836)
+ playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
+ playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
+ if playlists:
+ entries = [
+ self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
+ for playlist_url in playlists]
+
+ # news article with multiple videos embedded with data-playable
+ data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
+ if data_playables:
+ for _, data_playable_json in data_playables:
+ data_playable = self._parse_json(
+ unescapeHTML(data_playable_json), playlist_id, fatal=False)
+ if not data_playable:
+ continue
+ settings = data_playable.get('settings', {})
+ if settings:
+ # data-playable with video vpid in settings.playlistObject.items (e.g.
+ # http://www.bbc.com/news/world-us-canada-34473351)
+ playlist_object = settings.get('playlistObject', {})
+ if playlist_object:
+ items = playlist_object.get('items')
+ if items and isinstance(items, list):
+ title = playlist_object['title']
+ description = playlist_object.get('summary')
+ duration = int_or_none(items[0].get('duration'))
+ programme_id = items[0].get('vpid')
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ else:
+ # data-playable without vpid but with a playlist.sxml URLs
+ # in otherSettings.playlist (e.g.
+ # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
+ playlist = data_playable.get('otherSettings', {}).get('playlist', {})
+ if playlist:
+ entry = None
+ for key in ('streaming', 'progressiveDownload'):
+ playlist_url = playlist.get('%sUrl' % key)
+ if not playlist_url:
+ continue
+ try:
+ info = self._extract_from_playlist_sxml(
+ playlist_url, playlist_id, timestamp)
+ if not entry:
+ entry = info
+ else:
+ entry['title'] = info['title']
+ entry['formats'].extend(info['formats'])
+ except ExtractorError as e:
+ # Some playlist URL may fail with 500, at the same time
+ # the other one may work fine (e.g.
+ # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+ continue
+ raise
+ if entry:
+ self._sort_formats(entry['formats'])
+ entries.append(entry)
+
+ if entries:
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
+ group_id = self._search_regex(
+ r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
+ webpage, 'group id', default=None)
+ if group_id:
+ return self.url_result(
+ 'https://www.bbc.co.uk/programmes/%s' % group_id,
+ ie=BBCCoUkIE.ie_key())
+
+ # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
+ programme_id = self._search_regex(
+ [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
+ r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
+ r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
+ webpage, 'vpid', default=None)
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
+ digital_data = self._parse_json(
+ self._search_regex(
+ r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
+ programme_id, fatal=False)
+ page_info = digital_data.get('page', {}).get('pageInfo', {})
+ title = page_info.get('pageName') or self._og_search_title(webpage)
+ description = page_info.get('description') or self._og_search_description(webpage)
+ timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
+ initial_data = self._parse_json(self._html_search_regex(
+ r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
+ webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
+ if initial_data:
+ init_data = try_get(
+ initial_data, lambda x: x['initData']['items'][0], dict) or {}
+ smp_data = init_data.get('smpData') or {}
+ clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
+ version_id = clip_data.get('versionID')
+ if version_id:
+ title = smp_data['title']
+ formats, subtitles = self._download_media_selector(version_id)
+ self._sort_formats(formats)
+ image_url = smp_data.get('holdingImageURL')
+ display_date = init_data.get('displayDate')
+ topic_title = init_data.get('topicTitle')
+
+ return {
+ 'id': version_id,
+ 'title': title,
+ 'formats': formats,
+ 'alt_title': init_data.get('shortTitle'),
+ 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
+ 'description': smp_data.get('summary') or init_data.get('shortSummary'),
+ 'upload_date': display_date.replace('-', '') if display_date else None,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(clip_data.get('duration')),
+ 'categories': [topic_title] if topic_title else None,
+ }
+
+ # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
+ # There are several setPayload calls may be present but the video
+ # seems to be always related to the first one
+ morph_payload = self._parse_json(
+ self._search_regex(
+ r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
+ webpage, 'morph payload', default='{}'),
+ playlist_id, fatal=False)
+ if morph_payload:
+ components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
+ for component in components:
+ if not isinstance(component, dict):
+ continue
+ lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
+ if not lead_media:
+ continue
+ identifiers = lead_media.get('identifiers')
+ if not identifiers or not isinstance(identifiers, dict):
+ continue
+ programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
+ if not programme_id:
+ continue
+ title = lead_media.get('title') or self._og_search_title(webpage)
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ description = lead_media.get('summary')
+ uploader = lead_media.get('masterBrand')
+ uploader_id = lead_media.get('mid')
+ duration = None
+ duration_d = lead_media.get('duration')
+ if isinstance(duration_d, dict):
+ duration = parse_duration(dict_get(
+ duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ preload_state = self._parse_json(self._search_regex(
+ r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+ 'preload state', default='{}'), playlist_id, fatal=False)
+ if preload_state:
+ current_programme = preload_state.get('programmes', {}).get('current') or {}
+ programme_id = current_programme.get('id')
+ if current_programme and programme_id and current_programme.get('type') == 'playable_item':
+ title = current_programme.get('titles', {}).get('tertiary') or playlist_title
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ synopses = current_programme.get('synopses') or {}
+ network = current_programme.get('network') or {}
+ duration = int_or_none(
+ current_programme.get('duration', {}).get('value'))
+ thumbnail = None
+ image_url = current_programme.get('image_url')
+ if image_url:
+ thumbnail = image_url.replace('{recipe}', 'raw')
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': dict_get(synopses, ('long', 'medium', 'short')),
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': network.get('short_title'),
+ 'uploader_id': network.get('id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ bbc3_config = self._parse_json(
+ self._search_regex(
+ r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
+ 'bbcthree config', default='{}'),
+ playlist_id, transform_source=js_to_json, fatal=False) or {}
+ payload = bbc3_config.get('payload') or {}
+ if payload:
+ clip = payload.get('currentClip') or {}
+ clip_vpid = clip.get('vpid')
+ clip_title = clip.get('title')
+ if clip_vpid and clip_title:
+ formats, subtitles = self._download_media_selector(clip_vpid)
+ self._sort_formats(formats)
+ return {
+ 'id': clip_vpid,
+ 'title': clip_title,
+ 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
+ 'description': clip.get('description'),
+ 'duration': parse_duration(clip.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ bbc3_playlist = try_get(
+ payload, lambda x: x['content']['bbcMedia']['playlist'],
+ dict)
+ if bbc3_playlist:
+ playlist_title = bbc3_playlist.get('title') or playlist_title
+ thumbnail = bbc3_playlist.get('holdingImageURL')
+ entries = []
+ for bbc3_item in bbc3_playlist['items']:
+ programme_id = bbc3_item.get('versionID')
+ if not programme_id:
+ continue
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': playlist_title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+ initial_data = self._parse_json(self._search_regex(
+ r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
+ 'preload state', default='{}'), playlist_id, fatal=False)
+ if initial_data:
+ def parse_media(media):
+ if not media:
+ return
+ for item in (try_get(media, lambda x: x['media']['items'], list) or []):
+ item_id = item.get('id')
+ item_title = item.get('title')
+ if not (item_id and item_title):
+ continue
+ formats, subtitles = self._download_media_selector(item_id)
+ self._sort_formats(formats)
+ item_desc = None
+ blocks = try_get(media, lambda x: x['summary']['blocks'], list)
+ if blocks:
+ summary = []
+ for block in blocks:
+ text = try_get(block, lambda x: x['model']['text'], compat_str)
+ if text:
+ summary.append(text)
+ if summary:
+ item_desc = '\n\n'.join(summary)
+ item_time = None
+ for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
+ if try_get(meta, lambda x: x['label']) == 'Published':
+ item_time = unified_timestamp(meta.get('timestamp'))
+ break
+ entries.append({
+ 'id': item_id,
+ 'title': item_title,
+ 'thumbnail': item.get('holdingImageUrl'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'timestamp': item_time,
+ 'description': strip_or_none(item_desc),
+ })
+ for resp in (initial_data.get('data') or {}).values():
+ name = resp.get('name')
+ if name == 'media-experience':
+ parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
+ elif name == 'article':
+ for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+ if block.get('type') != 'media':
+ continue
+ parse_media(block.get('model'))
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+ def extract_all(pattern):
+ return list(filter(None, map(
+ lambda s: self._parse_json(s, playlist_id, fatal=False),
+ re.findall(pattern, webpage))))
+
+ # Multiple video article (e.g.
+ # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
+ EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
+ entries = []
+ for match in extract_all(r'new\s+SMP\(({.+?})\)'):
+ embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
+ if embed_url and re.match(EMBED_URL, embed_url):
+ entries.append(embed_url)
+ entries.extend(re.findall(
+ r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
+ if entries:
+ return self.playlist_result(
+ [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
+ playlist_id, playlist_title, playlist_description)
+
+ # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
+ medias = extract_all(r"data-media-meta='({[^']+})'")
+
+ if not medias:
+ # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
+ media_asset = self._search_regex(
+ r'mediaAssetPage\.init\(\s*({.+?}), "/',
+ webpage, 'media asset', default=None)
+ if media_asset:
+ media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
+ medias = []
+ for video in media_asset_page.get('videos', {}).values():
+ medias.extend(video.values())
+
+ if not medias:
+ # Multiple video playlist with single `now playing` entry (e.g.
+ # http://www.bbc.com/news/video_and_audio/must_see/33767813)
+ vxp_playlist = self._parse_json(
+ self._search_regex(
+ r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
+ webpage, 'playlist data'),
+ playlist_id)
+ playlist_medias = []
+ for item in vxp_playlist:
+ media = item.get('media')
+ if not media:
+ continue
+ playlist_medias.append(media)
+ # Download single video if found media with asset id matching the video id from URL
+ if item.get('advert', {}).get('assetId') == playlist_id:
+ medias = [media]
+ break
+ # Fallback to the whole playlist
+ if not medias:
+ medias = playlist_medias
+
+ entries = []
+ for num, media_meta in enumerate(medias, start=1):
+ formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
+ if not formats:
+ continue
+ self._sort_formats(formats)
+
+ video_id = media_meta.get('externalId')
+ if not video_id:
+ video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
+
+ title = media_meta.get('caption')
+ if not title:
+ title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
+
+ duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
+
+ images = []
+ for image in media_meta.get('images', {}).values():
+ images.extend(image.values())
+ if 'image' in media_meta:
+ images.append(media_meta['image'])
+
+ thumbnails = [{
+ 'url': image.get('href'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in images]
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+
+class BBCCoUkArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+ IE_NAME = 'bbc.co.uk:article'
+ IE_DESC = 'BBC articles'
+
+ _TEST = {
+ 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
+ 'info_dict': {
+ 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
+ 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
+ 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
+ },
+ 'playlist_count': 4,
+ 'add_ie': ['BBCCoUk'],
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage).strip()
+
+ entries = [self.url_result(programme_url) for programme_url in re.findall(
+ r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkPlaylistBaseIE(InfoExtractor):
+ def _entries(self, webpage, url, playlist_id):
+ single_page = 'page' in compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ for page_num in itertools.count(2):
+ for video_id in re.findall(
+ self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
+ yield self.url_result(
+ self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
+ if single_page:
+ return
+ next_page = self._search_regex(
+ r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage, 'next page url', default=None, group='url')
+ if not next_page:
+ break
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, next_page), playlist_id,
+ 'Downloading page %d' % page_num, page_num)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title, description = self._extract_title_and_description(webpage)
+
+ return self.playlist_result(
+ self._entries(webpage, url, playlist_id),
+ playlist_id, title, description)
+
+
+class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
+ _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+
+ @staticmethod
+ def _get_default(episode, key, default_key='default'):
+ return try_get(episode, lambda x: x[key][default_key])
+
+ def _get_description(self, data):
+ synopsis = data.get(self._DESCRIPTION_KEY) or {}
+ return dict_get(synopsis, ('large', 'medium', 'small'))
+
+ def _fetch_page(self, programme_id, per_page, series_id, page):
+ elements = self._get_elements(self._call_api(
+ programme_id, per_page, page + 1, series_id))
+ for element in elements:
+ episode = self._get_episode(element)
+ episode_id = episode.get('id')
+ if not episode_id:
+ continue
+ thumbnail = None
+ image = self._get_episode_image(episode)
+ if image:
+ thumbnail = image.replace('{recipe}', 'raw')
+ category = self._get_default(episode, 'labels', 'category')
+ yield {
+ '_type': 'url',
+ 'id': episode_id,
+ 'title': self._get_episode_field(episode, 'subtitle'),
+ 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
+ 'thumbnail': thumbnail,
+ 'description': self._get_description(episode),
+ 'categories': [category] if category else None,
+ 'series': self._get_episode_field(episode, 'title'),
+ 'ie_key': BBCCoUkIE.ie_key(),
+ }
+
+ def _real_extract(self, url):
+ pid = self._match_id(url)
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ series_id = qs.get('seriesId', [None])[0]
+ page = qs.get('page', [None])[0]
+ per_page = 36 if page else self._PAGE_SIZE
+ fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
+ entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
+ playlist_data = self._get_playlist_data(self._call_api(pid, 1))
+ return self.playlist_result(
+ entries, pid, self._get_playlist_title(playlist_data),
+ self._get_description(playlist_data))
+
+
+class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:iplayer:episodes'
+ _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance',
+ 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ # all seasons
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
+ 'info_dict': {
+ 'id': 'b094m5t9',
+ 'title': 'Doctor Foster',
+ 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # explicit season
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
+ 'info_dict': {
+ 'id': 'b094m5t9',
+ 'title': 'Doctor Foster',
+ 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ # all pages
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
+ 'info_dict': {
+ 'id': 'm0004c4v',
+ 'title': 'Beechgrove',
+ 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+ },
+ 'playlist_mincount': 37,
+ }, {
+ # explicit page
+ 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
+ 'info_dict': {
+ 'id': 'm0004c4v',
+ 'title': 'Beechgrove',
+ 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+ },
+ 'playlist_mincount': 1,
+ }]
+ _PAGE_SIZE = 100
+ _DESCRIPTION_KEY = 'synopsis'
+
+ def _get_episode_image(self, episode):
+ return self._get_default(episode, 'image')
+
+ def _get_episode_field(self, episode, field):
+ return self._get_default(episode, field)
+
+ @staticmethod
+ def _get_elements(data):
+ return data['entities']['results']
+
+ @staticmethod
+ def _get_episode(element):
+ return element.get('episode') or {}
+
+ def _call_api(self, pid, per_page, page=1, series_id=None):
+ variables = {
+ 'id': pid,
+ 'page': page,
+ 'perPage': per_page,
+ }
+ if series_id:
+ variables['sliceId'] = series_id
+ return self._download_json(
+ 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
+ 'Content-Type': 'application/json'
+ }, data=json.dumps({
+ 'id': '5692d93d5aac8d796a0305e895e61551',
+ 'variables': variables,
+ }).encode('utf-8'))['data']['programme']
+
+ @staticmethod
+ def _get_playlist_data(data):
+ return data
+
+ def _get_playlist_title(self, data):
+ return self._get_default(data, 'title')
+
+
+class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:iplayer:group'
+ _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
+ _TESTS = [{
+ # Available for over a year unlike 30 days for most other programmes
+ 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
+ 'info_dict': {
+ 'id': 'p02tcc32',
+ 'title': 'Bohemian Icons',
+ 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # all pages
+ 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
+ 'info_dict': {
+ 'id': 'p081d7j7',
+ 'title': 'Music in Scotland',
+ 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+ },
+ 'playlist_mincount': 47,
+ }, {
+ # explicit page
+ 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
+ 'info_dict': {
+ 'id': 'p081d7j7',
+ 'title': 'Music in Scotland',
+ 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+ },
+ 'playlist_mincount': 11,
+ }]
+ _PAGE_SIZE = 200
+ _DESCRIPTION_KEY = 'synopses'
+
+ def _get_episode_image(self, episode):
+ return self._get_default(episode, 'images', 'standard')
+
+ def _get_episode_field(self, episode, field):
+ return episode.get(field)
+
+ @staticmethod
+ def _get_elements(data):
+ return data['elements']
+
+ @staticmethod
+ def _get_episode(element):
+ return element
+
+ def _call_api(self, pid, per_page, page=1, series_id=None):
+ return self._download_json(
+ 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
+ pid, query={
+ 'page': page,
+ 'per_page': per_page,
+ })['group_episodes']
+
+ @staticmethod
+ def _get_playlist_data(data):
+ return data['group']
+
+ def _get_playlist_title(self, data):
+ return data.get('title')
+
+
+class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:playlist'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
+ _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
+ _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance - Clips - BBC Four',
+ 'description': 'French thriller serial about a missing teenager.',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ # multipage playlist, explicit page
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # multipage playlist, all pages
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 142,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
+ 'only_matching': True,
+ }]
+
+ def _extract_title_and_description(self, webpage):
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+ return title, description
diff --git a/hypervideo_dl/extractor/beatport.py b/hypervideo_dl/extractor/beatport.py
new file mode 100644
index 0000000..e607094
--- /dev/null
+++ b/hypervideo_dl/extractor/beatport.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
+ 'md5': 'b3c34d8639a2f6a7f734382358478887',
+ 'info_dict': {
+ 'id': '5379371',
+ 'display_id': 'synesthesia-original-mix',
+ 'ext': 'mp4',
+ 'title': 'Froxic - Synesthesia (Original Mix)',
+ },
+ }, {
+ 'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
+ 'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+ 'info_dict': {
+ 'id': '3756896',
+ 'display_id': 'love-and-war-original-mix',
+ 'ext': 'mp3',
+ 'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+ },
+ }, {
+ 'url': 'https://beatport.com/track/birds-original-mix/4991738',
+ 'md5': 'a1fd8e8046de3950fd039304c186c05f',
+ 'info_dict': {
+ 'id': '4991738',
+ 'display_id': 'birds-original-mix',
+ 'ext': 'mp4',
+ 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ track_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playables = self._parse_json(
+ self._search_regex(
+ r'window\.Playables\s*=\s*({.+?});', webpage,
+ 'playables info', flags=re.DOTALL),
+ track_id)
+
+ track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+ title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+ if track['mix']:
+ title += ' (' + track['mix'] + ')'
+
+ formats = []
+ for ext, info in track['preview'].items():
+ if not info['url']:
+ continue
+ fmt = {
+ 'url': info['url'],
+ 'ext': ext,
+ 'format_id': ext,
+ 'vcodec': 'none',
+ }
+ if ext == 'mp3':
+ fmt['preference'] = 0
+ fmt['acodec'] = 'mp3'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ elif ext == 'mp4':
+ fmt['preference'] = 1
+ fmt['acodec'] = 'aac'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ images = []
+ for name, info in track['images'].items():
+ image_url = info.get('url')
+ if name == 'dynamic' or not image_url:
+ continue
+ image = {
+ 'id': name,
+ 'url': image_url,
+ 'height': int_or_none(info.get('height')),
+ 'width': int_or_none(info.get('width')),
+ }
+ images.append(image)
+
+ return {
+ 'id': compat_str(track.get('id')) or track_id,
+ 'display_id': track.get('slug') or display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': images,
+ }
diff --git a/hypervideo_dl/extractor/beeg.py b/hypervideo_dl/extractor/beeg.py
new file mode 100644
index 0000000..5788d13
--- /dev/null
+++ b/hypervideo_dl/extractor/beeg.py
@@ -0,0 +1,116 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class BeegIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
+ _TESTS = [{
+ # api/v6 v1
+ 'url': 'http://beeg.com/5416503',
+ 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
+ 'info_dict': {
+ 'id': '5416503',
+ 'ext': 'mp4',
+ 'title': 'Sultry Striptease',
+ 'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
+ 'timestamp': 1391813355,
+ 'upload_date': '20140207',
+ 'duration': 383,
+ 'tags': list,
+ 'age_limit': 18,
+ }
+ }, {
+ # api/v6 v2
+ 'url': 'https://beeg.com/1941093077?t=911-1391',
+ 'only_matching': True,
+ }, {
+ # api/v6 v2 w/o t
+ 'url': 'https://beeg.com/1277207756',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://beeg.porn/video/5416503',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://beeg.porn/5416503',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ beeg_version = self._search_regex(
+ r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
+ default='1546225636701')
+
+ if len(video_id) >= 10:
+ query = {
+ 'v': 2,
+ }
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ t = qs.get('t', [''])[0].split('-')
+ if len(t) > 1:
+ query.update({
+ 's': t[0],
+ 'e': t[1],
+ })
+ else:
+ query = {'v': 1}
+
+ for api_path in ('', 'api.'):
+ video = self._download_json(
+ 'https://%sbeeg.com/api/v6/%s/video/%s'
+ % (api_path, beeg_version, video_id), video_id,
+ fatal=api_path == 'api.', query=query)
+ if video:
+ break
+
+ formats = []
+ for format_id, video_url in video.items():
+ if not video_url:
+ continue
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
+ if not height:
+ continue
+ formats.append({
+ 'url': self._proto_relative_url(
+ video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
+ 'format_id': format_id,
+ 'height': int(height),
+ })
+ self._sort_formats(formats)
+
+ title = video['title']
+ video_id = compat_str(video.get('id') or video_id)
+ display_id = video.get('code')
+ description = video.get('desc')
+ series = video.get('ps_name')
+
+ timestamp = unified_timestamp(video.get('date'))
+ duration = int_or_none(video.get('duration'))
+
+ tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'series': series,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'tags': tags,
+ 'formats': formats,
+ 'age_limit': self._rta_search(webpage),
+ }
diff --git a/hypervideo_dl/extractor/behindkink.py b/hypervideo_dl/extractor/behindkink.py
new file mode 100644
index 0000000..9bca853
--- /dev/null
+++ b/hypervideo_dl/extractor/behindkink.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import url_basename
+
+
+class BehindKinkIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
+ _TEST = {
+ 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+ 'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
+ 'info_dict': {
+ 'id': '37127',
+ 'ext': 'mp4',
+ 'title': 'What are you passionate about – Marley Blaze',
+ 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+ 'upload_date': '20141205',
+ 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source src="([^"]+)"', webpage, 'video URL')
+ video_id = url_basename(video_url).split('_')[0]
+ upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': self._og_search_title(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'upload_date': upload_date,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/bellmedia.py b/hypervideo_dl/extractor/bellmedia.py
new file mode 100644
index 0000000..9f9de96
--- /dev/null
+++ b/hypervideo_dl/extractor/bellmedia.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class BellMediaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?P<domain>
+ (?:
+ ctv|
+ tsn|
+ bnn(?:bloomberg)?|
+ thecomedynetwork|
+ discovery|
+ discoveryvelocity|
+ sciencechannel|
+ investigationdiscovery|
+ animalplanet|
+ bravo|
+ mtv|
+ space|
+ etalk|
+ marilyn
+ )\.ca|
+ (?:much|cp24)\.com
+ )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
+ _TESTS = [{
+ 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
+ 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
+ 'info_dict': {
+ 'id': '1403070',
+ 'ext': 'flv',
+ 'title': 'David Cockfield\'s Top Picks',
+ 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
+ 'upload_date': '20180525',
+ 'timestamp': 1527288600,
+ },
+ }, {
+ 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.etalk.ca/video?videoid=663455',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cp24.com/video?clipId=1982548',
+ 'only_matching': True,
+ }]
+ _DOMAINS = {
+ 'thecomedynetwork': 'comedy',
+ 'discoveryvelocity': 'discvel',
+ 'sciencechannel': 'discsci',
+ 'investigationdiscovery': 'invdisc',
+ 'animalplanet': 'aniplan',
+ 'etalk': 'ctv',
+ 'bnnbloomberg': 'bnn',
+ 'marilyn': 'ctv_marilyn',
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ domain = domain.split('.')[0]
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
+ 'ie_key': 'NineCNineMedia',
+ }
diff --git a/hypervideo_dl/extractor/bet.py b/hypervideo_dl/extractor/bet.py
new file mode 100644
index 0000000..d7ceaa8
--- /dev/null
+++ b/hypervideo_dl/extractor/bet.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+from ..utils import unified_strdate
+
+
+class BetIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
+ _TESTS = [
+ {
+ 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
+ 'info_dict': {
+ 'id': '07e96bd3-8850-3051-b856-271b457f0ab8',
+ 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
+ 'ext': 'flv',
+ 'title': 'A Conversation With President Obama',
+ 'description': 'President Obama urges persistence in confronting racism and bias.',
+ 'duration': 1534,
+ 'upload_date': '20141208',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'subtitles': {
+ 'en': 'mincount:2',
+ }
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
+ 'info_dict': {
+ 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9',
+ 'display_id': 'justice-for-ferguson-a-community-reacts',
+ 'ext': 'flv',
+ 'title': 'Justice for Ferguson: A Community Reacts',
+ 'description': 'A BET News special.',
+ 'duration': 1696,
+ 'upload_date': '20141125',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'subtitles': {
+ 'en': 'mincount:2',
+ }
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+ ]
+
+ _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
+
+ def _get_feed_query(self, uri):
+ return {
+ 'uuid': uri,
+ }
+
+ def _extract_mgid(self, webpage):
+ return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+ mgid = self._extract_mgid(webpage)
+ videos_info = self._get_videos_info(mgid)
+
+ info_dict = videos_info['entries'][0]
+
+ upload_date = unified_strdate(self._html_search_meta('date', webpage))
+ description = self._html_search_meta('description', webpage)
+
+ info_dict.update({
+ 'display_id': display_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/bfi.py b/hypervideo_dl/extractor/bfi.py
new file mode 100644
index 0000000..60c8944
--- /dev/null
+++ b/hypervideo_dl/extractor/bfi.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class BFIPlayerIE(InfoExtractor):
+ IE_NAME = 'bfi:player'
+ _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
+ _TEST = {
+ 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
+ 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
+ 'info_dict': {
+ 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
+ 'ext': 'mp4',
+ 'title': 'Computer Doctor',
+ 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
+ },
+ 'skip': 'BFI Player films cannot be played outside of the UK',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ entries = []
+ for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
+ player_attr = extract_attributes(player_el)
+ ooyala_id = player_attr.get('data-video-id')
+ if not ooyala_id:
+ continue
+ entries.append(self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala',
+ ooyala_id, player_attr.get('data-label')))
+ return self.playlist_result(entries)
diff --git a/hypervideo_dl/extractor/bfmtv.py b/hypervideo_dl/extractor/bfmtv.py
new file mode 100644
index 0000000..501f69d
--- /dev/null
+++ b/hypervideo_dl/extractor/bfmtv.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class BFMTVBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
+ _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
+ _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ def _brightcove_url_result(self, video_id, video_block):
+ account_id = video_block.get('accountid') or '876450612001'
+ player_id = video_block.get('playerid') or 'I2qBTln4u'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+ 'BrightcoveNew', video_id)
+
+
+class BFMTVIE(BFMTVBaseIE):
+ IE_NAME = 'bfmtv'
+ _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
+ _TESTS = [{
+ 'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
+ 'info_dict': {
+ 'id': '6196747868001',
+ 'ext': 'mp4',
+ 'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
+ 'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
+ 'uploader_id': '876450610001',
+ 'upload_date': '20201002',
+ 'timestamp': 1601629620,
+ },
+ }]
+
+ def _real_extract(self, url):
+ bfmtv_id = self._match_id(url)
+ webpage = self._download_webpage(url, bfmtv_id)
+ video_block = extract_attributes(self._search_regex(
+ self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
+ return self._brightcove_url_result(video_block['videoid'], video_block)
+
+
+class BFMTVLiveIE(BFMTVIE):
+ IE_NAME = 'bfmtv:live'
+ _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
+ _TESTS = [{
+ 'url': 'https://www.bfmtv.com/en-direct/',
+ 'info_dict': {
+ 'id': '5615950982001',
+ 'ext': 'mp4',
+ 'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'uploader_id': '876450610001',
+ 'upload_date': '20171018',
+ 'timestamp': 1508329950,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.bfmtv.com/economie/en-direct/',
+ 'only_matching': True,
+ }]
+
+
+class BFMTVArticleIE(BFMTVBaseIE):
+ IE_NAME = 'bfmtv:article'
+ _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
+ _TESTS = [{
+ 'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
+ 'info_dict': {
+ 'id': '202101060198',
+ 'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
+ 'description': 'md5:947974089c303d3ac6196670ae262843',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ bfmtv_id = self._match_id(url)
+ webpage = self._download_webpage(url, bfmtv_id)
+
+ entries = []
+ for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
+ video_block = extract_attributes(video_block_el)
+ video_id = video_block.get('videoid')
+ if not video_id:
+ continue
+ entries.append(self._brightcove_url_result(video_id, video_block))
+
+ return self.playlist_result(
+ entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
+ self._html_search_meta(['og:description', 'description'], webpage))
diff --git a/hypervideo_dl/extractor/bibeltv.py b/hypervideo_dl/extractor/bibeltv.py
new file mode 100644
index 0000000..56c2bfe
--- /dev/null
+++ b/hypervideo_dl/extractor/bibeltv.py
@@ -0,0 +1,30 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class BibelTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
+ 'md5': '252f908192d611de038b8504b08bf97f',
+ 'info_dict': {
+ 'id': 'ref:329703',
+ 'ext': 'mp4',
+ 'title': 'Sprachkurs in Malaiisch',
+ 'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
+ 'timestamp': 1608316701,
+ 'uploader_id': '5840105145001',
+ 'upload_date': '20201218',
+ }
+ }, {
+ 'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
+
+ def _real_extract(self, url):
+ crn_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
diff --git a/hypervideo_dl/extractor/bigflix.py b/hypervideo_dl/extractor/bigflix.py
new file mode 100644
index 0000000..28e3e59
--- /dev/null
+++ b/hypervideo_dl/extractor/bigflix.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+
+
+class BigflixIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # 2 formats
+ 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
+ 'info_dict': {
+ 'id': '16070',
+ 'ext': 'mp4',
+ 'title': 'Madarasapatinam',
+ 'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
+ 'formats': 'mincount:2',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # multiple formats
+ 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
+ webpage, 'title')
+
+ def decode_url(quoted_b64_url):
+ return compat_b64decode(compat_urllib_parse_unquote(
+ quoted_b64_url)).decode('utf-8')
+
+ formats = []
+ for height, encoded_url in re.findall(
+ r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
+ video_url = decode_url(encoded_url)
+ f = {
+ 'url': video_url,
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ }
+ if video_url.startswith('rtmp'):
+ f['ext'] = 'flv'
+ formats.append(f)
+
+ file_url = self._search_regex(
+ r'file=([^&]+)', webpage, 'video url', default=None)
+ if file_url:
+ video_url = decode_url(file_url)
+ if all(f['url'] != video_url for f in formats):
+ formats.append({
+ 'url': decode_url(file_url),
+ })
+
+ self._sort_formats(formats)
+
+ description = self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/bild.py b/hypervideo_dl/extractor/bild.py
new file mode 100644
index 0000000..b8dfbd4
--- /dev/null
+++ b/hypervideo_dl/extractor/bild.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class BildIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
+ IE_DESC = 'Bild.de'
+ _TEST = {
+ 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
+ 'md5': 'dd495cbd99f2413502a1713a1156ac8a',
+ 'info_dict': {
+ 'id': '38184146',
+ 'ext': 'mp4',
+ 'title': 'Das können die neuen iPads',
+ 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 196,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
+
+ return {
+ 'id': video_id,
+ 'title': unescapeHTML(video_data['title']).strip(),
+ 'description': unescapeHTML(video_data.get('description')),
+ 'url': video_data['clipList'][0]['srces'][0]['src'],
+ 'thumbnail': video_data.get('poster'),
+ 'duration': int_or_none(video_data.get('durationSec')),
+ }
diff --git a/hypervideo_dl/extractor/bilibili.py b/hypervideo_dl/extractor/bilibili.py
new file mode 100644
index 0000000..08e12cc
--- /dev/null
+++ b/hypervideo_dl/extractor/bilibili.py
@@ -0,0 +1,451 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+ strip_jsonp,
+ unified_timestamp,
+ unsmuggle_url,
+ urlencode_postdata,
+)
+
+
+class BiliBiliIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|bangumi)\.)?
+ bilibili\.(?:tv|com)/
+ (?:
+ (?:
+ video/[aA][vV]|
+ anime/(?P<anime_id>\d+)/play\#
+ )(?P<id_bv>\d+)|
+ video/[bB][vV](?P<id>[^/?#&]+)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.bilibili.tv/video/av1074402/',
+ 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
+ 'info_dict': {
+ 'id': '1074402',
+ 'ext': 'flv',
+ 'title': '【金坷垃】金泡沫',
+ 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
+ 'duration': 308.067,
+ 'timestamp': 1398012678,
+ 'upload_date': '20140420',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'uploader': '菊子桑',
+ 'uploader_id': '156160',
+ },
+ }, {
+ # Tested in BiliBiliBangumiIE
+ 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
+ 'md5': '3f721ad1e75030cc06faf73587cfec57',
+ 'info_dict': {
+ 'id': '100643',
+ 'ext': 'mp4',
+ 'title': 'CHAOS;CHILD',
+ 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
+ },
+ 'skip': 'Geo-restricted to China',
+ }, {
+ # Title with double quotes
+ 'url': 'http://www.bilibili.com/video/av8903802/',
+ 'info_dict': {
+ 'id': '8903802',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '8903802_part1',
+ 'ext': 'flv',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'uploader': '阿滴英文',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382634,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
+ }, {
+ 'info_dict': {
+ 'id': '8903802_part2',
+ 'ext': 'flv',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'uploader': '阿滴英文',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382634,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
+ }]
+ }, {
+ # new BV video id format
+ 'url': 'https://www.bilibili.com/video/BV1JE411F741',
+ 'only_matching': True,
+ }]
+
+ _APP_KEY = 'iVGUTjsxvpLeuDCf'
+ _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
+
+ def _report_error(self, result):
+ if 'message' in result:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
+ elif 'code' in result:
+ raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
+ else:
+ raise ExtractorError('Can\'t extract Bangumi episode ID')
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_bv')
+ anime_id = mobj.group('anime_id')
+ webpage = self._download_webpage(url, video_id)
+
+ if 'anime/' not in url:
+ cid = self._search_regex(
+ r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
+ default=None
+ ) or compat_parse_qs(self._search_regex(
+ [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+ r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
+ r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
+ webpage, 'player parameters'))['cid'][0]
+ else:
+ if 'no_bangumi_tip' not in smuggled_data:
+ self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run hypervideo with %s' % (
+ video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+ headers = {
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
+
+ js = self._download_json(
+ 'http://bangumi.bilibili.com/web_api/get_source', video_id,
+ data=urlencode_postdata({'episode_id': video_id}),
+ headers=headers)
+ if 'result' not in js:
+ self._report_error(js)
+ cid = js['result']['cid']
+
+ headers = {
+ 'Accept': 'application/json',
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
+
+ entries = []
+
+ RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
+ for num, rendition in enumerate(RENDITIONS, start=1):
+ payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
+ sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
+
+ video_info = self._download_json(
+ 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
+ video_id, note='Downloading video info page',
+ headers=headers, fatal=num == len(RENDITIONS))
+
+ if not video_info:
+ continue
+
+ if 'durl' not in video_info:
+ if num < len(RENDITIONS):
+ continue
+ self._report_error(video_info)
+
+ for idx, durl in enumerate(video_info['durl']):
+ formats = [{
+ 'url': durl['url'],
+ 'filesize': int_or_none(durl['size']),
+ }]
+ for backup_url in durl.get('backup_url', []):
+ formats.append({
+ 'url': backup_url,
+ # backup URLs have lower priorities
+ 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ })
+
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': '%s_part%s' % (video_id, idx),
+ 'duration': float_or_none(durl.get('length'), 1000),
+ 'formats': formats,
+ })
+ break
+
+ title = self._html_search_regex(
+ ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ group='title')
+ description = self._html_search_meta('description', webpage)
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
+ default=None) or self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp', default=None))
+ thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
+
+ # TODO 'view_count' requires deobfuscating Javascript
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ 'duration': float_or_none(video_info.get('timelength'), scale=1000),
+ }
+
+ uploader_mobj = re.search(
+ r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
+ webpage)
+ if uploader_mobj:
+ info.update({
+ 'uploader': uploader_mobj.group('name'),
+ 'uploader_id': uploader_mobj.group('id'),
+ })
+ if not info.get('uploader'):
+ info['uploader'] = self._html_search_meta(
+ 'author', webpage, 'uploader', default=None)
+
+ for entry in entries:
+ entry.update(info)
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ for idx, entry in enumerate(entries):
+ entry['id'] = '%s_part%d' % (video_id, (idx + 1))
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ }
+
+
+class BiliBiliBangumiIE(InfoExtractor):
+ _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
+
+ IE_NAME = 'bangumi.bilibili.com'
+ IE_DESC = 'BiliBili番剧'
+
+ _TESTS = [{
+ 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'info_dict': {
+ 'id': '1869',
+ 'title': '混沌武士',
+ 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+ },
+ 'playlist_count': 26,
+ }, {
+ 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'info_dict': {
+ 'id': '1869',
+ 'title': '混沌武士',
+ 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+ },
+ 'playlist': [{
+ 'md5': '91da8621454dd58316851c27c68b0c13',
+ 'info_dict': {
+ 'id': '40062',
+ 'ext': 'mp4',
+ 'title': '混沌武士',
+ 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
+ 'timestamp': 1414538739,
+ 'upload_date': '20141028',
+ 'episode': '疾风怒涛 Tempestuous Temperaments',
+ 'episode_number': 1,
+ },
+ }],
+ 'params': {
+ 'playlist_items': '1',
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ bangumi_id = self._match_id(url)
+
+ # Sometimes this API returns a JSONP response
+ season_info = self._download_json(
+ 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
+ bangumi_id, transform_source=strip_jsonp)['result']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
+ 'ie_key': BiliBiliIE.ie_key(),
+ 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
+ 'episode': episode.get('index_title'),
+ 'episode_number': int_or_none(episode.get('index')),
+ } for episode in season_info['episodes']]
+
+ entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
+
+ return self.playlist_result(
+ entries, bangumi_id,
+ season_info.get('bangumi_title'), season_info.get('evaluate'))
+
+
+class BilibiliAudioBaseIE(InfoExtractor):
+ def _call_api(self, path, sid, query=None):
+ if not query:
+ query = {'sid': sid}
+ return self._download_json(
+ 'https://www.bilibili.com/audio/music-service-c/web/' + path,
+ sid, query=query)['data']
+
+
+class BilibiliAudioIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/au1003142',
+ 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
+ 'info_dict': {
+ 'id': '1003142',
+ 'ext': 'm4a',
+ 'title': '【tsukimi】YELLOW / 神山羊',
+ 'artist': 'tsukimi',
+ 'comment_count': int,
+ 'description': 'YELLOW的mp3版!',
+ 'duration': 183,
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1564836614,
+ 'upload_date': '20190803',
+ 'uploader': 'tsukimi-つきみぐー',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ au_id = self._match_id(url)
+
+ play_data = self._call_api('url', au_id)
+ formats = [{
+ 'url': play_data['cdns'][0],
+ 'filesize': int_or_none(play_data.get('size')),
+ }]
+
+ song = self._call_api('song/info', au_id)
+ title = song['title']
+ statistic = song.get('statistic') or {}
+
+ subtitles = None
+ lyric = song.get('lyric')
+ if lyric:
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }]
+ }
+
+ return {
+ 'id': au_id,
+ 'title': title,
+ 'formats': formats,
+ 'artist': song.get('author'),
+ 'comment_count': int_or_none(statistic.get('comment')),
+ 'description': song.get('intro'),
+ 'duration': int_or_none(song.get('duration')),
+ 'subtitles': subtitles,
+ 'thumbnail': song.get('cover'),
+ 'timestamp': int_or_none(song.get('passtime')),
+ 'uploader': song.get('uname'),
+ 'view_count': int_or_none(statistic.get('play')),
+ }
+
+
+class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/am10624',
+ 'info_dict': {
+ 'id': '10624',
+ 'title': '每日新曲推荐(每日11:00更新)',
+ 'description': '每天11:00更新,为你推送最新音乐',
+ },
+ 'playlist_count': 19,
+ }
+
+ def _real_extract(self, url):
+ am_id = self._match_id(url)
+
+ songs = self._call_api(
+ 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
+
+ entries = []
+ for song in songs:
+ sid = str_or_none(song.get('id'))
+ if not sid:
+ continue
+ entries.append(self.url_result(
+ 'https://www.bilibili.com/audio/au' + sid,
+ BilibiliAudioIE.ie_key(), sid))
+
+ if entries:
+ album_data = self._call_api('menu/info', am_id) or {}
+ album_title = album_data.get('title')
+ if album_title:
+ for entry in entries:
+ entry['album'] = album_title
+ return self.playlist_result(
+ entries, am_id, album_title, album_data.get('intro'))
+
+ return self.playlist_result(entries, am_id)
+
+
+class BiliBiliPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.bilibili.tv/video/av%s/' % video_id,
+ ie=BiliBiliIE.ie_key(), video_id=video_id)
diff --git a/hypervideo_dl/extractor/biobiochiletv.py b/hypervideo_dl/extractor/biobiochiletv.py
new file mode 100644
index 0000000..dc86c57
--- /dev/null
+++ b/hypervideo_dl/extractor/biobiochiletv.py
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ remove_end,
+)
+
+
+class BioBioChileTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
+
+ _TESTS = [{
+ 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
+ 'md5': '26f51f03cf580265defefb4518faec09',
+ 'info_dict': {
+ 'id': 'sobre-camaras-y-camarillas-parlamentarias',
+ 'ext': 'mp4',
+ 'title': 'Sobre Cámaras y camarillas parlamentarias',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Fernando Atria',
+ },
+ 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
+ }, {
+ # different uploader layout
+ 'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
+ 'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
+ 'info_dict': {
+ 'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
+ 'ext': 'mp4',
+ 'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Piangella Obrador',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
+ }, {
+ 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
+ 'info_dict': {
+ 'id': 'b4xd0LK3SK',
+ 'ext': 'mp4',
+ # TODO: fix url_transparent information overriding
+ # 'uploader': 'Juan Pablo Echenique',
+ 'title': 'Comentario Oscar Cáceres',
+ },
+ 'params': {
+ # empty m3u8 manifest
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ rudo_url = self._search_regex(
+ r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
+ webpage, 'embed URL', None, group='url')
+ if not rudo_url:
+ raise ExtractorError('No videos found')
+
+ title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ uploader = self._html_search_regex(
+ r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': rudo_url,
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ }
diff --git a/hypervideo_dl/extractor/biqle.py b/hypervideo_dl/extractor/biqle.py
new file mode 100644
index 0000000..17ebbb2
--- /dev/null
+++ b/hypervideo_dl/extractor/biqle.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .vk import VKIE
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+from ..utils import int_or_none
+
+
+class BIQLEIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
+ _TESTS = [{
+ # Youtube embed
+ 'url': 'https://biqle.ru/watch/-115995369_456239081',
+ 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
+ 'info_dict': {
+ 'id': '8v4f-avW-VI',
+ 'ext': 'mp4',
+ 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
+ 'description': 'Passe-Partout',
+ 'uploader_id': 'mrsimpsonstef3',
+ 'uploader': 'Phanolito',
+ 'upload_date': '20120822',
+ },
+ }, {
+ 'url': 'http://biqle.org/watch/-44781847_168547604',
+ 'md5': '7f24e72af1db0edf7c1aaba513174f97',
+ 'info_dict': {
+ 'id': '-44781847_168547604',
+ 'ext': 'mp4',
+ 'title': 'Ребенок в шоке от автоматической мойки',
+ 'timestamp': 1396633454,
+ 'uploader': 'Dmitry Kotov',
+ 'upload_date': '20140404',
+ 'uploader_id': '47850140',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ embed_url = self._proto_relative_url(self._search_regex(
+ r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
+ webpage, 'embed url'))
+ if VKIE.suitable(embed_url):
+ return self.url_result(embed_url, VKIE.ie_key(), video_id)
+
+ embed_page = self._download_webpage(
+ embed_url, video_id, headers={'Referer': url})
+ video_ext = self._get_cookies(embed_url).get('video_ext')
+ if video_ext:
+ video_ext = compat_urllib_parse_unquote(video_ext.value)
+ if not video_ext:
+ video_ext = compat_b64decode(self._search_regex(
+ r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
+ embed_page, 'video_ext')).decode()
+ video_id, sig, _, access_token = video_ext.split(':')
+ item = self._download_json(
+ 'https://api.vk.com/method/video.get', video_id,
+ headers={'User-Agent': 'okhttp/3.4.1'}, query={
+ 'access_token': access_token,
+ 'sig': sig,
+ 'v': 5.44,
+ 'videos': video_id,
+ })['response']['items'][0]
+ title = item['title']
+
+ formats = []
+ for f_id, f_url in item.get('files', {}).items():
+ if f_id == 'external':
+ return self.url_result(f_url)
+ ext, height = f_id.split('_')
+ formats.append({
+ 'format_id': height + 'p',
+ 'url': f_url,
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for k, v in item.items():
+ if k.startswith('photo_') and v:
+ width = k.replace('photo_', '')
+ thumbnails.append({
+ 'id': width,
+ 'url': v,
+ 'width': int_or_none(width),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'comment_count': int_or_none(item.get('comments')),
+ 'description': item.get('description'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnails': thumbnails,
+ 'timestamp': int_or_none(item.get('date')),
+ 'uploader': item.get('owner_id'),
+ 'view_count': int_or_none(item.get('views')),
+ }
diff --git a/hypervideo_dl/extractor/bitchute.py b/hypervideo_dl/extractor/bitchute.py
new file mode 100644
index 0000000..0c773e6
--- /dev/null
+++ b/hypervideo_dl/extractor/bitchute.py
@@ -0,0 +1,142 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ orderedSet,
+ unified_strdate,
+ urlencode_postdata,
+)
+
+
+class BitChuteIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
+ 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
+ 'info_dict': {
+ 'id': 'szoMrox2JEI',
+ 'ext': 'mp4',
+ 'title': 'Fuck bitches get money',
+ 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Victoria X Rave',
+ 'upload_date': '20170813',
+ },
+ }, {
+ 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
+ })
+
+ title = self._html_search_regex(
+ (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
+ webpage, 'title', default=None) or self._html_search_meta(
+ 'description', webpage, 'title',
+ default=None) or self._og_search_description(webpage)
+
+ format_urls = []
+ for mobj in re.finditer(
+ r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+ format_urls.append(mobj.group('url'))
+ format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
+
+ formats = [
+ {'url': format_url}
+ for format_url in orderedSet(format_urls)]
+
+ if not formats:
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+
+ self._check_formats(formats, video_id)
+ self._sort_formats(formats)
+
+ description = self._html_search_regex(
+ r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image:src', webpage, 'thumbnail')
+ uploader = self._html_search_regex(
+ (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
+ r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
+ webpage, 'uploader', fatal=False)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
+ webpage, 'upload date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class BitChuteChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.bitchute.com/channel/victoriaxrave/',
+ 'playlist_mincount': 185,
+ 'info_dict': {
+ 'id': 'victoriaxrave',
+ },
+ }
+
+ _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
+
+ def _entries(self, channel_id):
+ channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
+ offset = 0
+ for page_num in itertools.count(1):
+ data = self._download_json(
+ '%sextend/' % channel_url, channel_id,
+ 'Downloading channel page %d' % page_num,
+ data=urlencode_postdata({
+ 'csrfmiddlewaretoken': self._TOKEN,
+ 'name': '',
+ 'offset': offset,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': channel_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Cookie': 'csrftoken=%s' % self._TOKEN,
+ })
+ if data.get('success') is False:
+ break
+ html = data.get('html')
+ if not html:
+ break
+ video_ids = re.findall(
+ r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
+ html)
+ if not video_ids:
+ break
+ offset += len(video_ids)
+ for video_id in video_ids:
+ yield self.url_result(
+ 'https://www.bitchute.com/video/%s' % video_id,
+ ie=BitChuteIE.ie_key(), video_id=video_id)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ return self.playlist_result(
+ self._entries(channel_id), playlist_id=channel_id)
diff --git a/hypervideo_dl/extractor/bleacherreport.py b/hypervideo_dl/extractor/bleacherreport.py
new file mode 100644
index 0000000..d1bf8e8
--- /dev/null
+++ b/hypervideo_dl/extractor/bleacherreport.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .amp import AMPIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class BleacherReportIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
+ 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
+ 'info_dict': {
+ 'id': '2496438',
+ 'ext': 'mp4',
+ 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
+ 'uploader_id': 3992341,
+ 'description': 'CFB, ACC, Florida State',
+ 'timestamp': 1434380212,
+ 'upload_date': '20150615',
+ 'uploader': 'Team Stream Now ',
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
+ 'md5': '6a5cd403418c7b01719248ca97fb0692',
+ 'info_dict': {
+ 'id': '2586817',
+ 'ext': 'webm',
+ 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
+ 'timestamp': 1446839961,
+ 'uploader': 'Sean Fay',
+ 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
+ 'uploader_id': 6466954,
+ 'upload_date': '20151011',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
+
+ thumbnails = []
+ primary_photo = article_data.get('primaryPhoto')
+ if primary_photo:
+ thumbnails = [{
+ 'url': primary_photo['url'],
+ 'width': primary_photo.get('width'),
+ 'height': primary_photo.get('height'),
+ }]
+
+ info = {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'title': article_data['title'],
+ 'uploader': article_data.get('author', {}).get('name'),
+ 'uploader_id': article_data.get('authorId'),
+ 'timestamp': parse_iso8601(article_data.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'comment_count': int_or_none(article_data.get('commentsCount')),
+ 'view_count': int_or_none(article_data.get('hitCount')),
+ }
+
+ video = article_data.get('video')
+ if video:
+ video_type = video['type']
+ if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
+ info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
+ elif video_type == 'ooyala.com':
+ info['url'] = 'ooyala:%s' % video['id']
+ elif video_type == 'youtube.com':
+ info['url'] = video['id']
+ elif video_type == 'vine.co':
+ info['url'] = 'https://vine.co/v/%s' % video['id']
+ else:
+ info['url'] = video_type + video['id']
+ return info
+ else:
+ raise ExtractorError('no video in the article', expected=True)
+
+
+class BleacherReportCMSIE(AMPIE):
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
+ 'md5': '670b2d73f48549da032861130488c681',
+ 'info_dict': {
+ 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'ext': 'mp4',
+ 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
+ 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+ 'upload_date': '20150723',
+ 'timestamp': 1437679032,
+
+ },
+ 'expected_warnings': [
+ 'Unable to download f4m manifest'
+ ]
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
+ info['id'] = video_id
+ return info
diff --git a/hypervideo_dl/extractor/bloomberg.py b/hypervideo_dl/extractor/bloomberg.py
new file mode 100644
index 0000000..2fbfad1
--- /dev/null
+++ b/hypervideo_dl/extractor/bloomberg.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class BloombergIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
+ # The md5 checksum changes
+ 'info_dict': {
+ 'id': 'qurhIVlJSB6hzkVi229d8g',
+ 'ext': 'flv',
+ 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
+ 'description': 'md5:a8ba0302912d03d246979735c17d2761',
+ },
+ 'params': {
+ 'format': 'best[format_id^=hds]',
+ },
+ }, {
+ # video ID in BPlayer(...)
+ 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/',
+ 'info_dict': {
+ 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74',
+ 'ext': 'flv',
+ 'title': 'Meet the Real-Life Tech Wizards of Middle Earth',
+ 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.',
+ },
+ 'params': {
+ 'format': 'best[format_id^=hds]',
+ },
+ }, {
+ # data-bmmrid=
+ 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ name = self._match_id(url)
+ webpage = self._download_webpage(url, name)
+ video_id = self._search_regex(
+ (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
+ webpage, 'id', group='id', default=None)
+ if not video_id:
+ bplayer_data = self._parse_json(self._search_regex(
+ r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
+ video_id = bplayer_data['id']
+ title = re.sub(': Video$', '', self._og_search_title(webpage))
+
+ embed_info = self._download_json(
+ 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ formats = []
+ for stream in embed_info['streams']:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ if stream['muxing_format'] == 'TS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id='hds', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/bokecc.py b/hypervideo_dl/extractor/bokecc.py
new file mode 100644
index 0000000..6017e83
--- /dev/null
+++ b/hypervideo_dl/extractor/bokecc.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
+
+
+class BokeCCBaseIE(InfoExtractor):
+ def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
+ player_params_str = self._html_search_regex(
+ r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
+ webpage, 'player params', group='query')
+
+ player_params = compat_parse_qs(player_params_str)
+
+ info_xml = self._download_xml(
+ 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
+ player_params['siteid'][0], player_params['vid'][0]), video_id)
+
+ formats = [{
+ 'format_id': format_id,
+ 'url': quality.find('./copy').attrib['playurl'],
+ 'preference': int(quality.attrib['value']),
+ } for quality in info_xml.findall('./video/quality')]
+
+ self._sort_formats(formats)
+
+ return formats
+
+
+class BokeCCIE(BokeCCBaseIE):
+ _IE_DESC = 'CC视频'
+ _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
+
+ _TESTS = [{
+ 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
+ 'info_dict': {
+ 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
+ 'ext': 'flv',
+ 'title': 'BokeCC Video',
+ },
+ }]
+
+ def _real_extract(self, url):
+ qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+ if not qs.get('vid') or not qs.get('uid'):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
+
+ webpage = self._download_webpage(url, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': 'BokeCC Video', # no title provided in the webpage
+ 'formats': self._extract_bokecc_formats(webpage, video_id),
+ }
diff --git a/hypervideo_dl/extractor/bongacams.py b/hypervideo_dl/extractor/bongacams.py
new file mode 100644
index 0000000..180542f
--- /dev/null
+++ b/hypervideo_dl/extractor/bongacams.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class BongaCamsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://de.bongacams.com/azumi-8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cn.bongacams.com/azumi-8',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ channel_id = mobj.group('id')
+
+ amf = self._download_json(
+ 'https://%s/tools/amf.php' % host, channel_id,
+ data=urlencode_postdata((
+ ('method', 'getRoomData'),
+ ('args[]', channel_id),
+ ('args[]', 'false'),
+ )), headers={'X-Requested-With': 'XMLHttpRequest'})
+
+ server_url = amf['localData']['videoServerUrl']
+
+ uploader_id = try_get(
+ amf, lambda x: x['performerData']['username'], compat_str) or channel_id
+ uploader = try_get(
+ amf, lambda x: x['performerData']['displayName'], compat_str)
+ like_count = int_or_none(try_get(
+ amf, lambda x: x['performerData']['loversCount']))
+
+ formats = self._extract_m3u8_formats(
+ '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
+ channel_id, 'mp4', m3u8_id='hls', live=True)
+ self._sort_formats(formats)
+
+ return {
+ 'id': channel_id,
+ 'title': self._live_title(uploader or uploader_id),
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': like_count,
+ 'age_limit': 18,
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/bostonglobe.py b/hypervideo_dl/extractor/bostonglobe.py
new file mode 100644
index 0000000..57882fb
--- /dev/null
+++ b/hypervideo_dl/extractor/bostonglobe.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ extract_attributes,
+)
+
+
+class BostonGlobeIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
+ _TESTS = [
+ {
+ 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
+ 'md5': '0a62181079c85c2d2b618c9a738aedaf',
+ 'info_dict': {
+ 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
+ 'id': '5320421710001',
+ 'ext': 'mp4',
+ 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
+ 'timestamp': 1486877593,
+ 'upload_date': '20170212',
+ 'uploader_id': '245991542',
+ },
+ },
+ {
+ # Embedded youtube video; we hand it off to the Generic extractor.
+ 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
+ 'md5': '582b40327089d5c0c949b3c54b13c24b',
+ 'info_dict': {
+ 'title': "Who Is Matt Damon's Favorite Batman?",
+ 'id': 'ZW1QCnlA6Qc',
+ 'ext': 'mp4',
+ 'upload_date': '20170217',
+ 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
+ 'uploader': 'The Late Late Show with James Corden',
+ 'uploader_id': 'TheLateLateShow',
+ },
+ 'expected_warnings': ['404'],
+ },
+ ]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ page_title = self._og_search_title(webpage, default=None)
+
+ # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
+ entries = []
+ for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
+ attrs = extract_attributes(video)
+
+ video_id = attrs.get('data-brightcove-video-id')
+ account_id = attrs.get('data-account')
+ player_id = attrs.get('data-player')
+ embed = attrs.get('data-embed')
+
+ if video_id and account_id and player_id and embed:
+ entries.append(
+ 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+ % (account_id, player_id, embed, video_id))
+
+ if len(entries) == 0:
+ return self.url_result(url, 'Generic')
+ elif len(entries) == 1:
+ return self.url_result(entries[0], 'BrightcoveNew')
+ else:
+ return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
diff --git a/hypervideo_dl/extractor/box.py b/hypervideo_dl/extractor/box.py
new file mode 100644
index 0000000..aae82d1
--- /dev/null
+++ b/hypervideo_dl/extractor/box.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_iso8601,
+ # try_get,
+ update_url_query,
+)
+
+
+class BoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
+ 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
+ 'info_dict': {
+ 'id': '510727257538',
+ 'ext': 'mp4',
+ 'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
+ 'uploader': 'MLS Video',
+ 'timestamp': 1566320259,
+ 'upload_date': '20190820',
+ 'uploader_id': '235196876',
+ }
+ }
+
+ def _real_extract(self, url):
+ shared_name, file_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, file_id)
+ request_token = self._parse_json(self._search_regex(
+ r'Box\.config\s*=\s*({.+?});', webpage,
+ 'Box config'), file_id)['requestToken']
+ access_token = self._download_json(
+ 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
+ 'Downloading token JSON metadata',
+ data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'X-Request-Token': request_token,
+ 'X-Box-EndUser-API': 'sharedName=' + shared_name,
+ })[file_id]['read']
+ shared_link = 'https://app.box.com/s/' + shared_name
+ f = self._download_json(
+ 'https://api.box.com/2.0/files/' + file_id, file_id,
+ 'Downloading file JSON metadata', headers={
+ 'Authorization': 'Bearer ' + access_token,
+ 'BoxApi': 'shared_link=' + shared_link,
+ 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
+ }, query={
+ 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
+ })
+ title = f['name']
+
+ query = {
+ 'access_token': access_token,
+ 'shared_link': shared_link
+ }
+
+ formats = []
+
+ # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
+ # entry_url_template = try_get(
+ # entry, lambda x: x['content']['url_template'])
+ # if not entry_url_template:
+ # continue
+ # representation = entry.get('representation')
+ # if representation == 'dash':
+ # TODO: append query to every fragment URL
+ # formats.extend(self._extract_mpd_formats(
+ # entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
+ # file_id, query=query))
+
+ authenticated_download_url = f.get('authenticated_download_url')
+ if authenticated_download_url and f.get('is_download_available'):
+ formats.append({
+ 'ext': f.get('extension') or determine_ext(title),
+ 'filesize': f.get('size'),
+ 'format_id': 'download',
+ 'url': update_url_query(authenticated_download_url, query),
+ })
+
+ self._sort_formats(formats)
+
+ creator = f.get('created_by') or {}
+
+ return {
+ 'id': file_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': f.get('description') or None,
+ 'uploader': creator.get('name'),
+ 'timestamp': parse_iso8601(f.get('created_at')),
+ 'uploader_id': creator.get('id'),
+ }
diff --git a/hypervideo_dl/extractor/bpb.py b/hypervideo_dl/extractor/bpb.py
new file mode 100644
index 0000000..0783353
--- /dev/null
+++ b/hypervideo_dl/extractor/bpb.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ determine_ext,
+)
+
+
+class BpbIE(InfoExtractor):
+ IE_DESC = 'Bundeszentrale für politische Bildung'
+ _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
+
+ _TEST = {
+ 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
+ # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
+ 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
+ 'info_dict': {
+ 'id': '297',
+ 'ext': 'mp4',
+ 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
+ 'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h2 class="white">(.*?)</h2>', webpage, 'title')
+ video_info_dicts = re.findall(
+ r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
+
+ formats = []
+ for video_info in video_info_dicts:
+ video_info = self._parse_json(
+ video_info, video_id, transform_source=js_to_json, fatal=False)
+ if not video_info:
+ continue
+ video_url = video_info.get('src')
+ if not video_url:
+ continue
+ quality = 'high' if '_high' in video_url else 'low'
+ formats.append({
+ 'url': video_url,
+ 'preference': 10 if quality == 'high' else 0,
+ 'format_note': quality,
+ 'format_id': '%s-%s' % (quality, determine_ext(video_url)),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/hypervideo_dl/extractor/br.py b/hypervideo_dl/extractor/br.py
new file mode 100644
index 0000000..9bde7f2
--- /dev/null
+++ b/hypervideo_dl/extractor/br.py
@@ -0,0 +1,311 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ xpath_element,
+ xpath_text,
+)
+
+
+class BRIE(InfoExtractor):
+ IE_DESC = 'Bayerischer Rundfunk'
+ _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
+ 'md5': '83a0477cf0b8451027eb566d88b51106',
+ 'info_dict': {
+ 'id': '48f656ef-287e-486f-be86-459122db22cc',
+ 'ext': 'mp4',
+ 'title': 'Die böse Überraschung',
+ 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
+ 'duration': 180,
+ 'uploader': 'Reinhard Weber',
+ 'upload_date': '20150422',
+ },
+ 'skip': '404 not found',
+ },
+ {
+ 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
+ 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
+ 'info_dict': {
+ 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
+ 'ext': 'flv',
+ 'title': 'Manfred Schreiber ist tot',
+ 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
+ 'duration': 26,
+ },
+ 'skip': '404 not found',
+ },
+ {
+ 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
+ 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
+ 'info_dict': {
+ 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
+ 'ext': 'aac',
+ 'title': 'Kurzweilig und sehr bewegend',
+ 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
+ 'duration': 296,
+ },
+ 'skip': '404 not found',
+ },
+ {
+ 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
+ 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
+ 'info_dict': {
+ 'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
+ 'ext': 'mp4',
+ 'title': 'Umweltbewusster Häuslebauer',
+ 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
+ 'duration': 116,
+ }
+ },
+ {
+ 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
+ 'md5': '23bca295f1650d698f94fc570977dae3',
+ 'info_dict': {
+ 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
+ 'ext': 'mp4',
+ 'title': 'Folge 1 - Metaphysik',
+ 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
+ 'duration': 893,
+ 'uploader': 'Eva Maria Steimle',
+ 'upload_date': '20170208',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ base_url, display_id = re.search(self._VALID_URL, url).groups()
+ page = self._download_webpage(url, display_id)
+ xml_url = self._search_regex(
+ r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
+ xml = self._download_xml(base_url + xml_url, display_id)
+
+ medias = []
+
+ for xml_media in xml.findall('video') + xml.findall('audio'):
+ media_id = xml_media.get('externalId')
+ media = {
+ 'id': media_id,
+ 'title': xpath_text(xml_media, 'title', 'title', True),
+ 'duration': parse_duration(xpath_text(xml_media, 'duration')),
+ 'formats': self._extract_formats(xpath_element(
+ xml_media, 'assets'), media_id),
+ 'thumbnails': self._extract_thumbnails(xpath_element(
+ xml_media, 'teaserImage/variants'), base_url),
+ 'description': xpath_text(xml_media, 'desc'),
+ 'webpage_url': xpath_text(xml_media, 'permalink'),
+ 'uploader': xpath_text(xml_media, 'author'),
+ }
+ broadcast_date = xpath_text(xml_media, 'broadcastDate')
+ if broadcast_date:
+ media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
+ medias.append(media)
+
+ if len(medias) > 1:
+ self._downloader.report_warning(
+ 'found multiple medias; please '
+ 'report this with the video URL to http://yt-dl.org/bug')
+ if not medias:
+ raise ExtractorError('No media entries found')
+ return medias[0]
+
+ def _extract_formats(self, assets, media_id):
+ formats = []
+ for asset in assets.findall('asset'):
+ format_url = xpath_text(asset, ['downloadUrl', 'url'])
+ asset_type = asset.get('type')
+ if asset_type.startswith('HDS'):
+ formats.extend(self._extract_f4m_formats(
+ format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
+ elif asset_type.startswith('HLS'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
+ else:
+ format_info = {
+ 'ext': xpath_text(asset, 'mediaType'),
+ 'width': int_or_none(xpath_text(asset, 'frameWidth')),
+ 'height': int_or_none(xpath_text(asset, 'frameHeight')),
+ 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
+ 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
+ 'vcodec': xpath_text(asset, 'codecVideo'),
+ 'acodec': xpath_text(asset, 'codecAudio'),
+ 'container': xpath_text(asset, 'mediaType'),
+ 'filesize': int_or_none(xpath_text(asset, 'size')),
+ }
+ format_url = self._proto_relative_url(format_url)
+ if format_url:
+ http_format_info = format_info.copy()
+ http_format_info.update({
+ 'url': format_url,
+ 'format_id': 'http-%s' % asset_type,
+ })
+ formats.append(http_format_info)
+ server_prefix = xpath_text(asset, 'serverPrefix')
+ if server_prefix:
+ rtmp_format_info = format_info.copy()
+ rtmp_format_info.update({
+ 'url': server_prefix,
+ 'play_path': xpath_text(asset, 'fileName'),
+ 'format_id': 'rtmp-%s' % asset_type,
+ })
+ formats.append(rtmp_format_info)
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_thumbnails(self, variants, base_url):
+ thumbnails = [{
+ 'url': base_url + xpath_text(variant, 'url'),
+ 'width': int_or_none(xpath_text(variant, 'width')),
+ 'height': int_or_none(xpath_text(variant, 'height')),
+ } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
+ thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
+ return thumbnails
+
+
+class BRMediathekIE(InfoExtractor):
+ IE_DESC = 'Bayerischer Rundfunk Mediathek'
+ _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
+
+ _TESTS = [{
+ 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
+ 'md5': 'fdc3d485835966d1622587d08ba632ec',
+ 'info_dict': {
+ 'id': 'av:5a1e6a6e8fce6d001871cc8e',
+ 'ext': 'mp4',
+ 'title': 'Die Sendung vom 28.11.2017',
+ 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
+ 'timestamp': 1511942766,
+ 'upload_date': '20171129',
+ }
+ }]
+
+ def _real_extract(self, url):
+ clip_id = self._match_id(url)
+
+ clip = self._download_json(
+ 'https://proxy-base.master.mango.express/graphql',
+ clip_id, data=json.dumps({
+ "query": """{
+ viewer {
+ clip(id: "%s") {
+ title
+ description
+ duration
+ createdAt
+ ageRestriction
+ videoFiles {
+ edges {
+ node {
+ publicLocation
+ fileSize
+ videoProfile {
+ width
+ height
+ bitrate
+ encoding
+ }
+ }
+ }
+ }
+ captionFiles {
+ edges {
+ node {
+ publicLocation
+ }
+ }
+ }
+ teaserImages {
+ edges {
+ node {
+ imageFiles {
+ edges {
+ node {
+ publicLocation
+ width
+ height
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}""" % clip_id}).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['data']['viewer']['clip']
+ title = clip['title']
+
+ formats = []
+ for edge in clip.get('videoFiles', {}).get('edges', []):
+ node = edge.get('node', {})
+ n_url = node.get('publicLocation')
+ if not n_url:
+ continue
+ ext = determine_ext(n_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ n_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ video_profile = node.get('videoProfile', {})
+ tbr = int_or_none(video_profile.get('bitrate'))
+ format_id = 'http'
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': n_url,
+ 'width': int_or_none(video_profile.get('width')),
+ 'height': int_or_none(video_profile.get('height')),
+ 'tbr': tbr,
+ 'filesize': int_or_none(node.get('fileSize')),
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for edge in clip.get('captionFiles', {}).get('edges', []):
+ node = edge.get('node', {})
+ n_url = node.get('publicLocation')
+ if not n_url:
+ continue
+ subtitles.setdefault('de', []).append({
+ 'url': n_url,
+ })
+
+ thumbnails = []
+ for edge in clip.get('teaserImages', {}).get('edges', []):
+ for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
+ node = image_edge.get('node', {})
+ n_url = node.get('publicLocation')
+ if not n_url:
+ continue
+ thumbnails.append({
+ 'url': n_url,
+ 'width': int_or_none(node.get('width')),
+ 'height': int_or_none(node.get('height')),
+ })
+
+ return {
+ 'id': clip_id,
+ 'title': title,
+ 'description': clip.get('description'),
+ 'duration': int_or_none(clip.get('duration')),
+ 'timestamp': parse_iso8601(clip.get('createdAt')),
+ 'age_limit': int_or_none(clip.get('ageRestriction')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/bravotv.py b/hypervideo_dl/extractor/bravotv.py
new file mode 100644
index 0000000..bae2aed
--- /dev/null
+++ b/hypervideo_dl/extractor/bravotv.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ smuggle_url,
+ update_url_query,
+ int_or_none,
+)
+
+
+class BravoTVIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
+ 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
+ 'info_dict': {
+ 'id': 'epL0pmK1kQlT',
+ 'ext': 'mp4',
+ 'title': 'The Top Chef Season 16 Winner Is...',
+ 'description': 'Find out who takes the title of Top Chef!',
+ 'uploader': 'NBCU-BRAV',
+ 'upload_date': '20190314',
+ 'timestamp': 1552591860,
+ }
+ }, {
+ 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ settings = self._parse_json(self._search_regex(
+ r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
+ display_id)
+ info = {}
+ query = {
+ 'mbr': 'true',
+ }
+ account_pid, release_pid = [None] * 2
+ tve = settings.get('ls_tve')
+ if tve:
+ query['manifest'] = 'm3u'
+ mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
+ if mobj:
+ account_pid, tp_path = mobj.groups()
+ release_pid = tp_path.strip('/').split('/')[-1]
+ else:
+ account_pid = 'HNK2IC'
+ tp_path = release_pid = tve['release_pid']
+ if tve.get('entitlement') == 'auth':
+ adobe_pass = settings.get('tve_adobe_auth', {})
+ if site == 'bravotv':
+ site = 'bravo'
+ resource = self._get_mvpd_resource(
+ adobe_pass.get('adobePassResourceId') or site,
+ tve['title'], release_pid, tve.get('rating'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, release_pid,
+ adobe_pass.get('adobePassRequestorId') or site, resource)
+ else:
+ shared_playlist = settings['ls_playlist']
+ account_pid = shared_playlist['account_pid']
+ metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
+ tp_path = release_pid = metadata.get('release_pid')
+ if not release_pid:
+ release_pid = metadata['guid']
+ tp_path = 'media/guid/2140479951/' + release_pid
+ info.update({
+ 'title': metadata['title'],
+ 'description': metadata.get('description'),
+ 'season_number': int_or_none(metadata.get('season_num')),
+ 'episode_number': int_or_none(metadata.get('episode_num')),
+ })
+ query['switch'] = 'progressive'
+ info.update({
+ '_type': 'url_transparent',
+ 'id': release_pid,
+ 'url': smuggle_url(update_url_query(
+ 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
+ query), {'force_smil_url': True}),
+ 'ie_key': 'ThePlatform',
+ })
+ return info
diff --git a/hypervideo_dl/extractor/breakcom.py b/hypervideo_dl/extractor/breakcom.py
new file mode 100644
index 0000000..68c7cf2
--- /dev/null
+++ b/hypervideo_dl/extractor/breakcom.py
@@ -0,0 +1,91 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ int_or_none,
+ url_or_none,
+)
+
+
+class BreakIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
+ 'info_dict': {
+ 'id': '2468056',
+ 'ext': 'mp4',
+ 'title': 'When Girls Act Like D-Bags',
+ 'age_limit': 13,
+ },
+ }, {
+ # youtube embed
+ 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
+ 'info_dict': {
+ 'id': 'RrrDLdeL2HQ',
+ 'ext': 'mp4',
+ 'title': 'Whale Watching Boat Crashing Into San Diego Dock',
+ 'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
+ 'upload_date': '20160331',
+ 'uploader': 'Steve Holden',
+ 'uploader_id': 'sdholden07',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, display_id)
+
+ youtube_url = YoutubeIE._extract_url(webpage)
+ if youtube_url:
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+
+ content = self._parse_json(
+ self._search_regex(
+ r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
+ 'content'),
+ display_id)
+
+ formats = []
+ for video in content:
+ video_url = url_or_none(video.get('url'))
+ if not video_url:
+ continue
+ bitrate = int_or_none(self._search_regex(
+ r'(\d+)_kbps', video_url, 'tbr', default=None))
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http-%d' % bitrate if bitrate else 'http',
+ 'tbr': bitrate,
+ })
+ self._sort_formats(formats)
+
+ title = self._search_regex(
+ (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
+
+ def get(key, name):
+ return int_or_none(self._search_regex(
+ r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
+ default=None))
+
+ age_limit = get('ratings', 'age limit')
+ video_id = video_id or get('pid', 'video id') or display_id
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/brightcove.py b/hypervideo_dl/extractor/brightcove.py
new file mode 100644
index 0000000..6022076
--- /dev/null
+++ b/hypervideo_dl/extractor/brightcove.py
@@ -0,0 +1,681 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import re
+import struct
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ ExtractorError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ unsmuggle_url,
+ UnsupportedError,
+ update_url_query,
+ url_or_none,
+)
+
+
+class BrightcoveLegacyIE(InfoExtractor):
+ IE_NAME = 'brightcove:legacy'
+ _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
+
+ _TESTS = [
+ {
+ # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
+ 'md5': '5423e113865d26e40624dce2e4b45d95',
+ 'note': 'Test Brightcove downloads and detection in GenericIE',
+ 'info_dict': {
+ 'id': '2371591881001',
+ 'ext': 'mp4',
+ 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
+ 'uploader': '8TV',
+ 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
+ 'timestamp': 1368213670,
+ 'upload_date': '20130510',
+ 'uploader_id': '1589608506001',
+ },
+ 'skip': 'The player has been deactivated by the content owner',
+ },
+ {
+ # From http://medianetwork.oracle.com/video/player/1785452137001
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
+ 'info_dict': {
+ 'id': '1785452137001',
+ 'ext': 'flv',
+ 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
+ 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
+ 'uploader': 'Oracle',
+ 'timestamp': 1344975024,
+ 'upload_date': '20120814',
+ 'uploader_id': '1460825906',
+ },
+ 'skip': 'video not playable',
+ },
+ {
+ # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+ 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+ 'info_dict': {
+ 'id': '2750934548001',
+ 'ext': 'mp4',
+ 'title': 'This Bracelet Acts as a Personal Thermostat',
+ 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
+ # 'uploader': 'Mashable',
+ 'timestamp': 1382041798,
+ 'upload_date': '20131017',
+ 'uploader_id': '1130468786001',
+ },
+ },
+ {
+ # test that the default referer works
+ # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
+ 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
+ 'info_dict': {
+ 'id': '2878862109001',
+ 'ext': 'mp4',
+ 'title': 'Lost in Motion II',
+ 'description': 'md5:363109c02998fee92ec02211bd8000df',
+ 'uploader': 'National Ballet of Canada',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ # test flv videos served by akamaihd.net
+ # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
+ # The md5 checksum changes on each download
+ 'info_dict': {
+ 'id': '3750436379001',
+ 'ext': 'flv',
+ 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+ 'uploader': 'RBTV Old (do not use)',
+ 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+ 'timestamp': 1409122195,
+ 'upload_date': '20140827',
+ 'uploader_id': '710858724001',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ # playlist with 'videoList'
+ # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
+ 'info_dict': {
+ 'title': 'Sealife',
+ 'id': '3550319591001',
+ },
+ 'playlist_mincount': 7,
+ 'skip': 'Unsupported URL',
+ },
+ {
+ # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
+ 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
+ 'info_dict': {
+ 'id': '1522758701001',
+ 'title': 'Lesson 08',
+ },
+ 'playlist_mincount': 10,
+ 'skip': 'Unsupported URL',
+ },
+ {
+ # playerID inferred from bcpid
+ # from http://www.un.org/chinese/News/story.asp?NewsID=27724
+ 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
+ 'only_matching': True, # Tested in GenericIE
+ }
+ ]
+
+ @classmethod
+ def _build_brightcove_url(cls, object_str):
+ """
+ Build a Brightcove url from a xml string containing
+ <object class="BrightcoveExperience">{params}</object>
+ """
+
+ # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
+ object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
+ lambda m: m.group(1) + '/>', object_str)
+ # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
+ object_str = object_str.replace('<--', '<!--')
+ # remove namespace to simplify extraction
+ object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
+ object_str = fix_xml_ampersands(object_str)
+
+ try:
+ object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
+ except compat_xml_parse_error:
+ return
+
+ fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
+ if fv_el is not None:
+ flashvars = dict(
+ (k, v[0])
+ for k, v in compat_parse_qs(fv_el.attrib['value']).items())
+ else:
+ flashvars = {}
+
+ data_url = object_doc.attrib.get('data', '')
+ data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
+
+ def find_param(name):
+ if name in flashvars:
+ return flashvars[name]
+ node = find_xpath_attr(object_doc, './param', 'name', name)
+ if node is not None:
+ return node.attrib['value']
+ return data_url_params.get(name)
+
+ params = {}
+
+ playerID = find_param('playerID') or find_param('playerId')
+ if playerID is None:
+ raise ExtractorError('Cannot find player ID')
+ params['playerID'] = playerID
+
+ playerKey = find_param('playerKey')
+ # Not all pages define this value
+ if playerKey is not None:
+ params['playerKey'] = playerKey
+ # These fields hold the id of the video
+ videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
+ if videoPlayer is not None:
+ if isinstance(videoPlayer, list):
+ videoPlayer = videoPlayer[0]
+ videoPlayer = videoPlayer.strip()
+ # UUID is also possible for videoPlayer (e.g.
+ # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
+ # or http://www8.hp.com/cn/zh/home.html)
+ if not (re.match(
+ r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
+ videoPlayer) or videoPlayer.startswith('ref:')):
+ return None
+ params['@videoPlayer'] = videoPlayer
+ linkBase = find_param('linkBaseURL')
+ if linkBase is not None:
+ params['linkBaseURL'] = linkBase
+ return cls._make_brightcove_url(params)
+
+ @classmethod
+ def _build_brightcove_url_from_js(cls, object_js):
+ # The layout of JS is as follows:
+ # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
+ # // build Brightcove <object /> XML
+ # }
+ m = re.search(
+ r'''(?x)customBC\.createVideo\(
+ .*? # skipping width and height
+ ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
+ ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
+ # in length, however it's appended to itself
+ # in places, so truncate
+ ["\'](?P<videoID>\d+)["\'] # @videoPlayer
+ ''', object_js)
+ if m:
+ return cls._make_brightcove_url(m.groupdict())
+
+ @classmethod
+ def _make_brightcove_url(cls, params):
+ return update_url_query(
+ 'http://c.brightcove.com/services/viewer/htmlFederated', params)
+
+ @classmethod
+ def _extract_brightcove_url(cls, webpage):
+ """Try to extract the brightcove url from the webpage, returns None
+ if it can't be found
+ """
+ urls = cls._extract_brightcove_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def _extract_brightcove_urls(cls, webpage):
+ """Return a list of all Brightcove URLs from the webpage """
+
+ url_m = re.search(
+ r'''(?x)
+ <meta\s+
+ (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
+ content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
+ ''', webpage)
+ if url_m:
+ url = unescapeHTML(url_m.group('url'))
+ # Some sites don't add it, we can't download with this url, for example:
+ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+ if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
+ return [url]
+
+ matches = re.findall(
+ r'''(?sx)<object
+ (?:
+ [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
+ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
+ ).+?>\s*</object>''',
+ webpage)
+ if matches:
+ return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
+
+ matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
+ if matches:
+ return list(filter(None, [
+ cls._build_brightcove_url_from_js(custom_bc)
+ for custom_bc in matches]))
+ return [src for _, src in re.findall(
+ r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ # Change the 'videoId' and others field to '@videoPlayer'
+ url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
+ # Change bckey (used by bcove.me urls) to playerKey
+ url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
+ mobj = re.match(self._VALID_URL, url)
+ query_str = mobj.group('query')
+ query = compat_urlparse.parse_qs(query_str)
+
+ videoPlayer = query.get('@videoPlayer')
+ if videoPlayer:
+ # We set the original url as the default 'Referer' header
+ referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
+ video_id = videoPlayer[0]
+ if 'playerID' not in query:
+ mobj = re.search(r'/bcpid(\d+)', url)
+ if mobj is not None:
+ query['playerID'] = [mobj.group(1)]
+ publisher_id = query.get('publisherId')
+ if publisher_id and publisher_id[0].isdigit():
+ publisher_id = publisher_id[0]
+ if not publisher_id:
+ player_key = query.get('playerKey')
+ if player_key and ',' in player_key[0]:
+ player_key = player_key[0]
+ else:
+ player_id = query.get('playerID')
+ if player_id and player_id[0].isdigit():
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ player_page = self._download_webpage(
+ 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+ video_id, headers=headers, fatal=False)
+ if player_page:
+ player_key = self._search_regex(
+ r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
+ player_page, 'player key', fatal=False)
+ if player_key:
+ enc_pub_id = player_key.split(',')[1].replace('~', '=')
+ publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
+ if publisher_id:
+ brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+ if referer:
+ brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
+ return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+ # TODO: figure out if it's possible to extract playlistId from playerKey
+ # elif 'playerKey' in query:
+ # player_key = query['playerKey']
+ # return self._get_playlist_info(player_key[0])
+ raise UnsupportedError(url)
+
+
+class BrightcoveNewIE(AdobePassIE):
+ IE_NAME = 'brightcove:new'
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
+ _TESTS = [{
+ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+ 'md5': 'c8100925723840d4b0d243f7025703be',
+ 'info_dict': {
+ 'id': '4463358922001',
+ 'ext': 'mp4',
+ 'title': 'Meet the man behind Popcorn Time',
+ 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+ 'duration': 165.768,
+ 'timestamp': 1441391203,
+ 'upload_date': '20150904',
+ 'uploader_id': '929656772001',
+ 'formats': 'mincount:20',
+ },
+ }, {
+ # with rtmp streams
+ 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
+ 'info_dict': {
+ 'id': '4279049078001',
+ 'ext': 'mp4',
+ 'title': 'Titansgrave: Chapter 0',
+ 'description': 'Titansgrave: Chapter 0',
+ 'duration': 1242.058,
+ 'timestamp': 1433556729,
+ 'upload_date': '20150606',
+ 'uploader_id': '4036320279001',
+ 'formats': 'mincount:39',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # playlist stream
+ 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
+ 'info_dict': {
+ 'id': '5718313430001',
+ 'title': 'No Audio Playlist',
+ },
+ 'playlist_count': 7,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
+ 'only_matching': True,
+ }, {
+ # ref: prefixed video id
+ 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
+ 'only_matching': True,
+ }, {
+ # non numeric ref: prefixed video id
+ 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
+ 'only_matching': True,
+ }, {
+ # unavailable video without message but with error_code
+ 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(ie, webpage):
+ urls = BrightcoveNewIE._extract_urls(ie, webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(ie, webpage):
+ # Reference:
+ # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
+ # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
+ # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
+ # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
+ # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
+
+ entries = []
+
+ # Look for iframe embeds [1]
+ for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+ entries.append(url if url.startswith('http') else 'http:' + url)
+
+ # Look for <video> tags [2] and embed_in_page embeds [3]
+ # [2] looks like:
+ for video, script_tag, account_id, player_id, embed in re.findall(
+ r'''(?isx)
+ (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
+ (?:.*?
+ (<script[^>]+
+ src=["\'](?:https?:)?//players\.brightcove\.net/
+ (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
+ )
+ )?
+ ''', webpage):
+ attrs = extract_attributes(video)
+
+ # According to examples from [4] it's unclear whether video id
+ # may be optional and what to do when it is
+ video_id = attrs.get('data-video-id')
+ if not video_id:
+ continue
+
+ account_id = account_id or attrs.get('data-account')
+ if not account_id:
+ continue
+
+ player_id = player_id or attrs.get('data-player') or 'default'
+ embed = embed or attrs.get('data-embed') or 'default'
+
+ bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
+ account_id, player_id, embed, video_id)
+
+ # Some brightcove videos may be embedded with video tag only and
+ # without script tag or any mentioning of brightcove at all. Such
+ # embeds are considered ambiguous since they are matched based only
+ # on data-video-id and data-account attributes and in the wild may
+ # not be brightcove embeds at all. Let's check reconstructed
+ # brightcove URLs in case of such embeds and only process valid
+ # ones. By this we ensure there is indeed a brightcove embed.
+ if not script_tag and not ie._is_valid_url(
+ bc_url, video_id, 'possible brightcove video'):
+ continue
+
+ entries.append(bc_url)
+
+ return entries
+
+ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
+ title = json_data['name'].strip()
+
+ num_drm_sources = 0
+ formats = []
+ sources = json_data.get('sources') or []
+ for source in sources:
+ container = source.get('container')
+ ext = mimetype2ext(source.get('type'))
+ src = source.get('src')
+ # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
+ if container == 'WVM' or source.get('key_systems'):
+ num_drm_sources += 1
+ continue
+ elif ext == 'ism':
+ continue
+ elif ext == 'm3u8' or container == 'M2TS':
+ if not src:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ if not src:
+ continue
+ formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
+ else:
+ streaming_src = source.get('streaming_src')
+ stream_name, app_name = source.get('stream_name'), source.get('app_name')
+ if not src and not streaming_src and (not stream_name or not app_name):
+ continue
+ tbr = float_or_none(source.get('avg_bitrate'), 1000)
+ height = int_or_none(source.get('height'))
+ width = int_or_none(source.get('width'))
+ f = {
+ 'tbr': tbr,
+ 'filesize': int_or_none(source.get('size')),
+ 'container': container,
+ 'ext': ext or container.lower(),
+ }
+ if width == 0 and height == 0:
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'vcodec': source.get('codec'),
+ })
+
+ def build_format_id(kind):
+ format_id = kind
+ if tbr:
+ format_id += '-%dk' % int(tbr)
+ if height:
+ format_id += '-%dp' % height
+ return format_id
+
+ if src or streaming_src:
+ f.update({
+ 'url': src or streaming_src,
+ 'format_id': build_format_id('http' if src else 'http-streaming'),
+ 'source_preference': 0 if src else -1,
+ })
+ else:
+ f.update({
+ 'url': app_name,
+ 'play_path': stream_name,
+ 'format_id': build_format_id('rtmp'),
+ })
+ formats.append(f)
+
+ if not formats:
+ errors = json_data.get('errors')
+ if errors:
+ error = errors[0]
+ raise ExtractorError(
+ error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
+ if sources and num_drm_sources == len(sources):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ self._sort_formats(formats)
+
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ subtitles = {}
+ for text_track in json_data.get('text_tracks', []):
+ if text_track.get('kind') != 'captions':
+ continue
+ text_track_url = url_or_none(text_track.get('src'))
+ if not text_track_url:
+ continue
+ lang = (str_or_none(text_track.get('srclang'))
+ or str_or_none(text_track.get('label')) or 'en').lower()
+ subtitles.setdefault(lang, []).append({
+ 'url': text_track_url,
+ })
+
+ is_live = False
+ duration = float_or_none(json_data.get('duration'), 1000)
+ if duration is not None and duration <= 0:
+ is_live = True
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(json_data.get('description')),
+ 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
+ 'duration': duration,
+ 'timestamp': parse_iso8601(json_data.get('published_at')),
+ 'uploader_id': json_data.get('account_id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': json_data.get('tags', []),
+ 'is_live': is_live,
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
+ })
+
+ account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ policy_key_id = '%s_%s' % (account_id, player_id)
+ policy_key = self._downloader.cache.load('brightcove', policy_key_id)
+ policy_key_extracted = False
+ store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
+
+ def extract_policy_key():
+ base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
+ config = self._download_json(
+ base_url + 'config.json', video_id, fatal=False) or {}
+ policy_key = try_get(
+ config, lambda x: x['video_cloud']['policy_key'])
+ if not policy_key:
+ webpage = self._download_webpage(
+ base_url + 'index.min.js', video_id)
+
+ catalog = self._search_regex(
+ r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+ if catalog:
+ catalog = self._parse_json(
+ js_to_json(catalog), video_id, fatal=False)
+ if catalog:
+ policy_key = catalog.get('policyKey')
+
+ if not policy_key:
+ policy_key = self._search_regex(
+ r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+ webpage, 'policy key', group='pk')
+
+ store_pk(policy_key)
+ return policy_key
+
+ api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
+ headers = {}
+ referrer = smuggled_data.get('referrer')
+ if referrer:
+ headers.update({
+ 'Referer': referrer,
+ 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
+ })
+
+ for _ in range(2):
+ if not policy_key:
+ policy_key = extract_policy_key()
+ policy_key_extracted = True
+ headers['Accept'] = 'application/json;pk=%s' % policy_key
+ try:
+ json_data = self._download_json(api_url, video_id, headers=headers)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+ json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+ message = json_data.get('message') or json_data['error_code']
+ if json_data.get('error_subcode') == 'CLIENT_GEO':
+ self.raise_geo_restricted(msg=message)
+ elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
+ policy_key = None
+ store_pk(None)
+ continue
+ raise ExtractorError(message, expected=True)
+ raise
+
+ errors = json_data.get('errors')
+ if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
+ custom_fields = json_data['custom_fields']
+ tve_token = self._extract_mvpd_auth(
+ smuggled_data['source_url'], video_id,
+ custom_fields['bcadobepassrequestorid'],
+ custom_fields['bcadobepassresourceid'])
+ json_data = self._download_json(
+ api_url, video_id, headers={
+ 'Accept': 'application/json;pk=%s' % policy_key
+ }, query={
+ 'tveToken': tve_token,
+ })
+
+ if content_type == 'playlist':
+ return self.playlist_result(
+ [self._parse_brightcove_metadata(vid, vid.get('id'), headers)
+ for vid in json_data.get('videos', []) if vid.get('id')],
+ json_data.get('id'), json_data.get('name'),
+ json_data.get('description'))
+
+ return self._parse_brightcove_metadata(
+ json_data, video_id, headers=headers)
diff --git a/hypervideo_dl/extractor/businessinsider.py b/hypervideo_dl/extractor/businessinsider.py
new file mode 100644
index 0000000..73a57b1
--- /dev/null
+++ b/hypervideo_dl/extractor/businessinsider.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BusinessInsiderIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
+ 'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
+ 'info_dict': {
+ 'id': 'cjGDb0X9',
+ 'ext': 'mp4',
+ 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
+ 'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
+ 'upload_date': '20160611',
+ 'timestamp': 1465675620,
+ },
+ }, {
+ 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
+ 'md5': '43f438dbc6da0b89f5ac42f68529d84a',
+ 'info_dict': {
+ 'id': '5zJwd4FK',
+ 'ext': 'mp4',
+ 'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
+ 'description': 'md5:2af8975825d38a4fed24717bbe51db49',
+ 'upload_date': '20170705',
+ 'timestamp': 1499270528,
+ },
+ }, {
+ 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ jwplatform_id = self._search_regex(
+ (r'data-media-id=["\']([a-zA-Z0-9]{8})',
+ r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
+ r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
+ r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
+ webpage, 'jwplatform id')
+ return self.url_result(
+ 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
+ video_id=video_id)
diff --git a/hypervideo_dl/extractor/buzzfeed.py b/hypervideo_dl/extractor/buzzfeed.py
new file mode 100644
index 0000000..ec41109
--- /dev/null
+++ b/hypervideo_dl/extractor/buzzfeed.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .facebook import FacebookIE
+
+
+class BuzzFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
+ 'info_dict': {
+ 'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
+ 'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
+ 'description': 'Rambro!',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'aVCR29aE_OQ',
+ 'ext': 'mp4',
+ 'title': 'Angry Ram destroys a punching bag..',
+ 'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
+ 'upload_date': '20141024',
+ 'uploader_id': 'Buddhanz1',
+ 'uploader': 'Angry Ram',
+ }
+ }]
+ }, {
+ 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
+ 'params': {
+ 'skip_download': True, # Got enough YouTube download tests
+ },
+ 'info_dict': {
+ 'id': 'look-at-this-cute-dog-omg',
+ 'description': 're:Munchkin the Teddy Bear is back ?!',
+ 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'mVmBL8B-In0',
+ 'ext': 'mp4',
+ 'title': 're:Munchkin the Teddy Bear gets her exercise',
+ 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
+ 'upload_date': '20141124',
+ 'uploader_id': 'CindysMunchkin',
+ 'uploader': 're:^Munchkin the',
+ },
+ }]
+ }, {
+ 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
+ 'info_dict': {
+ 'id': 'the-most-adorable-crash-landing-ever',
+ 'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
+ 'description': 'This gosling knows how to stick a landing.',
+ },
+ 'playlist': [{
+ 'md5': '763ca415512f91ca62e4621086900a23',
+ 'info_dict': {
+ 'id': '971793786185728',
+ 'ext': 'mp4',
+ 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
+ 'uploader': 'Calgary Outdoor Centre-University of Calgary',
+ },
+ }],
+ 'add_ie': ['Facebook'],
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ all_buckets = re.findall(
+ r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
+ webpage)
+
+ entries = []
+ for bd_json in all_buckets:
+ bd = json.loads(bd_json)
+ video = bd.get('video') or bd.get('progload_video')
+ if not video:
+ continue
+ entries.append(self.url_result(video['url']))
+
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ entries.extend([
+ self.url_result(facebook_url)
+ for facebook_url in facebook_urls])
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/byutv.py b/hypervideo_dl/extractor/byutv.py
new file mode 100644
index 0000000..0b11bf1
--- /dev/null
+++ b/hypervideo_dl/extractor/byutv.py
@@ -0,0 +1,117 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ merge_dicts,
+ parse_duration,
+ url_or_none,
+)
+
+
+class BYUtvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TESTS = [{
+ # ooyalaVOD
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
+ 'info_dict': {
+ 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
+ 'display_id': 'studio-c-season-5-episode-5',
+ 'ext': 'mp4',
+ 'title': 'Season 5 Episode 5',
+ 'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 1486.486,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ # dvr
+ 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
+ 'info_dict': {
+ 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
+ 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
+ 'ext': 'mp4',
+ 'title': 'Pacific vs. BYU (4/12/19)',
+ 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
+ 'duration': 11645,
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ }, {
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ video = self._download_json(
+ 'https://api.byutv.org/api3/catalog/getvideosforcontent',
+ display_id, query={
+ 'contentid': video_id,
+ 'channel': 'byutv',
+ 'x-byutv-context': 'web$US',
+ }, headers={
+ 'x-byutv-context': 'web$US',
+ 'x-byutv-platformkey': 'xsaaw9c7y5',
+ })
+
+ ep = video.get('ooyalaVOD')
+ if ep:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ep['providerId'],
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ }
+
+ info = {}
+ formats = []
+ for format_id, ep in video.items():
+ if not isinstance(ep, dict):
+ continue
+ video_url = url_or_none(ep.get('videoUrl'))
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ merge_dicts(info, {
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ 'duration': parse_duration(ep.get('length')),
+ })
+ self._sort_formats(formats)
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': display_id,
+ 'formats': formats,
+ })
diff --git a/hypervideo_dl/extractor/c56.py b/hypervideo_dl/extractor/c56.py
new file mode 100644
index 0000000..cac8fdc
--- /dev/null
+++ b/hypervideo_dl/extractor/c56.py
@@ -0,0 +1,65 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class C56IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
+ IE_NAME = '56.com'
+ _TESTS = [{
+ 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
+ 'md5': 'e59995ac63d0457783ea05f93f12a866',
+ 'info_dict': {
+ 'id': '93440716',
+ 'ext': 'flv',
+ 'title': '网事知多少 第32期:车怒',
+ 'duration': 283.813,
+ },
+ }, {
+ 'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
+ 'md5': '',
+ 'info_dict': {
+ 'id': '82247482',
+ 'title': '爱的诅咒之杜鹃花开',
+ },
+ 'playlist_count': 7,
+ 'add_ie': ['Sohu'],
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+ text_id = mobj.group('textid')
+
+ webpage = self._download_webpage(url, text_id)
+ sohu_video_info_str = self._search_regex(
+ r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
+ if sohu_video_info_str:
+ sohu_video_info = self._parse_json(
+ sohu_video_info_str, text_id, transform_source=js_to_json)
+ return self.url_result(sohu_video_info['url'], 'Sohu')
+
+ page = self._download_json(
+ 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
+
+ info = page['info']
+
+ formats = [
+ {
+ 'format_id': f['type'],
+ 'filesize': int(f['filesize']),
+ 'url': f['url']
+ } for f in info['rfiles']
+ ]
+ self._sort_formats(formats)
+
+ return {
+ 'id': info['vid'],
+ 'title': info['Subject'],
+ 'duration': int(info['duration']) / 1000.0,
+ 'formats': formats,
+ 'thumbnail': info.get('bimg') or info.get('img'),
+ }
diff --git a/hypervideo_dl/extractor/camdemy.py b/hypervideo_dl/extractor/camdemy.py
new file mode 100644
index 0000000..8f0c6c5
--- /dev/null
+++ b/hypervideo_dl/extractor/camdemy.py
@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_urlparse,
+)
+from ..utils import (
+ clean_html,
+ parse_duration,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class CamdemyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
+ _TESTS = [{
+ # single file
+ 'url': 'http://www.camdemy.com/media/5181/',
+ 'md5': '5a5562b6a98b37873119102e052e311b',
+ 'info_dict': {
+ 'id': '5181',
+ 'ext': 'mp4',
+ 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'ss11spring',
+ 'duration': 1591,
+ 'upload_date': '20130114',
+ 'view_count': int,
+ }
+ }, {
+ # With non-empty description
+ # webpage returns "No permission or not login"
+ 'url': 'http://www.camdemy.com/media/13885',
+ 'md5': '4576a3bb2581f86c61044822adbd1249',
+ 'info_dict': {
+ 'id': '13885',
+ 'ext': 'mp4',
+ 'title': 'EverCam + Camdemy QuickStart',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
+ 'creator': 'evercam',
+ 'duration': 318,
+ }
+ }, {
+ # External source (YouTube)
+ 'url': 'http://www.camdemy.com/media/14842',
+ 'info_dict': {
+ 'id': '2vsYQzNIsJo',
+ 'ext': 'mp4',
+ 'title': 'Excel 2013 Tutorial - How to add Password Protection',
+ 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
+ 'upload_date': '20130211',
+ 'uploader': 'Hun Kim',
+ 'uploader_id': 'hunkimtutorials',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ src_from = self._html_search_regex(
+ r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
+ webpage, 'external source', default=None, group='url')
+ if src_from:
+ return self.url_result(src_from)
+
+ oembed_obj = self._download_json(
+ 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
+
+ title = oembed_obj['title']
+ thumb_url = oembed_obj['thumbnail_url']
+ video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
+ file_list_doc = self._download_xml(
+ compat_urlparse.urljoin(video_folder, 'fileList.xml'),
+ video_id, 'Downloading filelist XML')
+ file_name = file_list_doc.find('./video/item/fileName').text
+ video_url = compat_urlparse.urljoin(video_folder, file_name)
+
+ # Some URLs return "No permission or not login" in a webpage despite being
+ # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
+ upload_date = unified_strdate(self._search_regex(
+ r'>published on ([^<]+)<', webpage,
+ 'upload date', default=None))
+ view_count = str_to_int(self._search_regex(
+ r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
+ webpage, 'view count', default=None))
+ description = self._html_search_meta(
+ 'description', webpage, default=None) or clean_html(
+ oembed_obj.get('description'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumb_url,
+ 'description': description,
+ 'creator': oembed_obj.get('author_name'),
+ 'duration': parse_duration(oembed_obj.get('duration')),
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ }
+
+
+class CamdemyFolderIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
+ _TESTS = [{
+ # links with trailing slash
+ 'url': 'http://www.camdemy.com/folder/450',
+ 'info_dict': {
+ 'id': '450',
+ 'title': '信號與系統 2012 & 2011 (Signals and Systems)',
+ },
+ 'playlist_mincount': 145
+ }, {
+ # links without trailing slash
+ # and multi-page
+ 'url': 'http://www.camdemy.com/folder/853',
+ 'info_dict': {
+ 'id': '853',
+ 'title': '科學計算 - 使用 Matlab'
+ },
+ 'playlist_mincount': 20
+ }, {
+ # with displayMode parameter. For testing the codes to add parameters
+ 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
+ 'info_dict': {
+ 'id': '853',
+ 'title': '科學計算 - 使用 Matlab'
+ },
+ 'playlist_mincount': 20
+ }]
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ # Add displayMode=list so that all links are displayed in a single page
+ parsed_url = list(compat_urlparse.urlparse(url))
+ query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
+ query.update({'displayMode': 'list'})
+ parsed_url[4] = compat_urllib_parse_urlencode(query)
+ final_url = compat_urlparse.urlunparse(parsed_url)
+
+ page = self._download_webpage(final_url, folder_id)
+ matches = re.findall(r"href='(/media/\d+/?)'", page)
+
+ entries = [self.url_result('http://www.camdemy.com' + media_path)
+ for media_path in matches]
+
+ folder_title = self._html_search_meta('keywords', page)
+
+ return self.playlist_result(entries, folder_id, folder_title)
diff --git a/hypervideo_dl/extractor/cammodels.py b/hypervideo_dl/extractor/cammodels.py
new file mode 100644
index 0000000..1eb81b7
--- /dev/null
+++ b/hypervideo_dl/extractor/cammodels.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ url_or_none,
+)
+
+
+class CamModelsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.cammodels.com/cam/AutumnKnight/',
+ 'only_matching': True,
+ 'age_limit': 18
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, user_id, headers=self.geo_verification_headers())
+
+ manifest_root = self._html_search_regex(
+ r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
+
+ if not manifest_root:
+ ERRORS = (
+ ("I'm offline, but let's stay connected", 'This user is currently offline'),
+ ('in a private show', 'This user is in a private show'),
+ ('is currently performing LIVE', 'This model is currently performing live'),
+ )
+ for pattern, message in ERRORS:
+ if pattern in webpage:
+ error = message
+ expected = True
+ break
+ else:
+ error = 'Unable to find manifest URL root'
+ expected = False
+ raise ExtractorError(error, expected=expected)
+
+ manifest = self._download_json(
+ '%s%s.json' % (manifest_root, user_id), user_id)
+
+ formats = []
+ for format_id, format_dict in manifest['formats'].items():
+ if not isinstance(format_dict, dict):
+ continue
+ encodings = format_dict.get('encodings')
+ if not isinstance(encodings, list):
+ continue
+ vcodec = format_dict.get('videoCodec')
+ acodec = format_dict.get('audioCodec')
+ for media in encodings:
+ if not isinstance(media, dict):
+ continue
+ media_url = url_or_none(media.get('location'))
+ if not media_url:
+ continue
+
+ format_id_list = [format_id]
+ height = int_or_none(media.get('videoHeight'))
+ if height is not None:
+ format_id_list.append('%dp' % height)
+ f = {
+ 'url': media_url,
+ 'format_id': '-'.join(format_id_list),
+ 'width': int_or_none(media.get('videoWidth')),
+ 'height': height,
+ 'vbr': int_or_none(media.get('videoKbps')),
+ 'abr': int_or_none(media.get('audioKbps')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': vcodec,
+ 'acodec': acodec,
+ }
+ if 'rtmp' in format_id:
+ f['ext'] = 'flv'
+ elif 'hls' in format_id:
+ f.update({
+ 'ext': 'mp4',
+ # hls skips fragments, preferring rtmp
+ 'preference': -1,
+ })
+ else:
+ continue
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': user_id,
+ 'title': self._live_title(user_id),
+ 'is_live': True,
+ 'formats': formats,
+ 'age_limit': 18
+ }
diff --git a/hypervideo_dl/extractor/camtube.py b/hypervideo_dl/extractor/camtube.py
new file mode 100644
index 0000000..b3be3bd
--- /dev/null
+++ b/hypervideo_dl/extractor/camtube.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class CamTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
+ 'info_dict': {
+ 'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
+ 'display_id': 'minafay-030618-1136-chaturbate-female',
+ 'ext': 'mp4',
+ 'title': 'minafay-030618-1136-chaturbate-female',
+ 'duration': 1274,
+ 'timestamp': 1528018608,
+ 'upload_date': '20180603',
+ 'age_limit': 18
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _API_BASE = 'https://api.camtube.co'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ token = self._download_json(
+ '%s/rpc/session/new' % self._API_BASE, display_id,
+ 'Downloading session token')['token']
+
+ self._set_cookie('api.camtube.co', 'session', token)
+
+ video = self._download_json(
+ '%s/recordings/%s' % (self._API_BASE, display_id), display_id,
+ headers={'Referer': url})
+
+ video_id = video['uuid']
+ timestamp = unified_timestamp(video.get('createdAt'))
+ duration = int_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('viewCount'))
+ like_count = int_or_none(video.get('likeCount'))
+ creator = video.get('stageName')
+
+ formats = [{
+ 'url': '%s/recordings/%s/manifest.m3u8'
+ % (self._API_BASE, video_id),
+ 'format_id': 'hls',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': display_id,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'creator': creator,
+ 'formats': formats,
+ 'age_limit': 18
+ }
diff --git a/hypervideo_dl/extractor/camwithher.py b/hypervideo_dl/extractor/camwithher.py
new file mode 100644
index 0000000..bbc5205
--- /dev/null
+++ b/hypervideo_dl/extractor/camwithher.py
@@ -0,0 +1,89 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class CamWithHerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
+
+ _TESTS = [{
+ 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
+ 'info_dict': {
+ 'id': '5644',
+ 'ext': 'flv',
+ 'title': 'Periscope Tease',
+ 'description': 'In the clouds teasing on periscope to my favorite song',
+ 'duration': 240,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': 'MileenaK',
+ 'upload_date': '20160322',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ flv_id = self._html_search_regex(
+ r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
+
+ # Video URL construction algorithm is reverse-engineered from cwhplayer.swf
+ rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
+ ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
+
+ title = self._html_search_regex(
+ r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
+ description = self._html_search_regex(
+ r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
+
+ runtime = self._search_regex(
+ r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
+ if runtime:
+ runtime = re.sub(r'[\s-]', '', runtime)
+ duration = parse_duration(runtime)
+ view_count = int_or_none(self._search_regex(
+ r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
+ comment_count = int_or_none(self._search_regex(
+ r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
+
+ uploader = self._search_regex(
+ r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
+ upload_date = unified_strdate(self._search_regex(
+ r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
+
+ return {
+ 'id': flv_id,
+ 'url': rtmp_url,
+ 'ext': 'flv',
+ 'no_resume': True,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'age_limit': 18
+ }
diff --git a/hypervideo_dl/extractor/canalc2.py b/hypervideo_dl/extractor/canalc2.py
new file mode 100644
index 0000000..407cc80
--- /dev/null
+++ b/hypervideo_dl/extractor/canalc2.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class Canalc2IE(InfoExtractor):
+ IE_NAME = 'canalc2.tv'
+ _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.canalc2.tv/video/12163',
+ 'md5': '060158428b650f896c542dfbb3d6487f',
+ 'info_dict': {
+ 'id': '12163',
+ 'ext': 'mp4',
+ 'title': 'Terrasses du Numérique',
+ 'duration': 122,
+ },
+ }, {
+ 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.canalc2.tv/video/%s' % video_id, video_id)
+
+ title = self._html_search_regex(
+ r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
+ webpage, 'title')
+
+ formats = []
+ for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
+ if video_url.startswith('rtmp://'):
+ rtmp = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
+ formats.append({
+ 'url': rtmp.group('url'),
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ 'app': rtmp.group('app'),
+ 'play_path': rtmp.group('play_path'),
+ 'page_url': url,
+ })
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ })
+
+ if formats:
+ info = {
+ 'formats': formats,
+ }
+ else:
+ info = self._parse_html5_media_entries(url, webpage, url)[0]
+
+ self._sort_formats(info['formats'])
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': parse_duration(self._search_regex(
+ r'id=["\']video_duree["\'][^>]*>([^<]+)',
+ webpage, 'duration', fatal=False)),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/canalplus.py b/hypervideo_dl/extractor/canalplus.py
new file mode 100644
index 0000000..51c11cb
--- /dev/null
+++ b/hypervideo_dl/extractor/canalplus.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ # ExtractorError,
+ # HEADRequest,
+ int_or_none,
+ qualities,
+ unified_strdate,
+)
+
+
+class CanalplusIE(InfoExtractor):
+ IE_DESC = 'mycanal.fr and piwiplus.fr'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
+ _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
+ _SITE_ID_MAP = {
+ 'mycanal': 'cplus',
+ 'piwiplus': 'teletoon',
+ }
+
+ # Only works for direct mp4 URLs
+ _GEO_COUNTRIES = ['FR']
+
+ _TESTS = [{
+ 'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
+ 'info_dict': {
+ 'id': '1397061',
+ 'display_id': 'lolywood',
+ 'ext': 'mp4',
+ 'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
+ 'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
+ 'upload_date': '20160602',
+ },
+ }, {
+ # geo restricted, bypassed
+ 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
+ 'info_dict': {
+ 'id': '1108190',
+ 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
+ 'ext': 'mp4',
+ 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
+ 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
+ 'upload_date': '20140724',
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ }]
+
+ def _real_extract(self, url):
+ site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ site_id = self._SITE_ID_MAP[site]
+
+ info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
+ video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
+
+ if isinstance(video_data, list):
+ video_data = [video for video in video_data if video.get('ID') == video_id][0]
+ media = video_data['MEDIA']
+ infos = video_data['INFOS']
+
+ preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
+
+ # _, fmt_url = next(iter(media['VIDEOS'].items()))
+ # if '/geo' in fmt_url.lower():
+ # response = self._request_webpage(
+ # HEADRequest(fmt_url), video_id,
+ # 'Checking if the video is georestricted')
+ # if '/blocage' in response.geturl():
+ # raise ExtractorError(
+ # 'The video is not available in your country',
+ # expected=True)
+
+ formats = []
+ for format_id, format_url in media['VIDEOS'].items():
+ if not format_url:
+ continue
+ if format_id == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
+ elif format_id == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ # the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
+ 'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
+ 'format_id': format_id,
+ 'preference': preference(format_id),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = [{
+ 'id': image_id,
+ 'url': image_url,
+ } for image_id, image_url in media.get('images', {}).items()]
+
+ titrage = infos['TITRAGE']
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': '%s - %s' % (titrage['TITRE'],
+ titrage['SOUS_TITRE']),
+ 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')),
+ 'thumbnails': thumbnails,
+ 'description': infos.get('DESCRIPTION'),
+ 'duration': int_or_none(infos.get('DURATION')),
+ 'view_count': int_or_none(infos.get('NB_VUES')),
+ 'like_count': int_or_none(infos.get('NB_LIKES')),
+ 'comment_count': int_or_none(infos.get('NB_COMMENTS')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/canvas.py b/hypervideo_dl/extractor/canvas.py
new file mode 100644
index 0000000..eefbab2
--- /dev/null
+++ b/hypervideo_dl/extractor/canvas.py
@@ -0,0 +1,384 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ extract_attributes,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ merge_dicts,
+ str_or_none,
+ strip_or_none,
+ url_or_none,
+)
+
+
+class CanvasIE(InfoExtractor):
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'md5': '68993eda72ef62386a15ea2cf3c93107',
+ 'info_dict': {
+ 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'ext': 'mp4',
+ 'title': 'Nachtwacht: De Greystook',
+ 'description': 'Nachtwacht: De Greystook',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1468.04,
+ },
+ 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+ }, {
+ 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _HLS_ENTRY_PROTOCOLS_MAP = {
+ 'HLS': 'm3u8_native',
+ 'HLS_AES': 'm3u8',
+ }
+ _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site_id'), mobj.group('id')
+
+ data = None
+ if site_id != 'vrtvideo':
+ # Old API endpoint, serves more formats but may fail for some videos
+ data = self._download_json(
+ 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
+ % (site_id, video_id), video_id, 'Downloading asset JSON',
+ 'Unable to download asset JSON', fatal=False)
+
+ # New API endpoint
+ if not data:
+ headers = self.geo_verification_headers()
+ headers.update({'Content-Type': 'application/json'})
+ token = self._download_json(
+ '%s/tokens' % self._REST_API_BASE, video_id,
+ 'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
+ data = self._download_json(
+ '%s/videos/%s' % (self._REST_API_BASE, video_id),
+ video_id, 'Downloading video JSON', query={
+ 'vrtPlayerToken': token,
+ 'client': '%s@PROD' % site_id,
+ }, expected_status=400)
+ if not data.get('title'):
+ code = data.get('code')
+ if code == 'AUTHENTICATION_REQUIRED':
+ self.raise_login_required()
+ elif code == 'INVALID_LOCATION':
+ self.raise_geo_restricted(countries=['BE'])
+ raise ExtractorError(data.get('message') or code, expected=True)
+
+ title = data['title']
+ description = data.get('description')
+
+ formats = []
+ for target in data['targetUrls']:
+ format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
+ if not format_url or not format_type:
+ continue
+ format_type = format_type.upper()
+ if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
+ m3u8_id=format_type, fatal=False))
+ elif format_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_type, fatal=False))
+ elif format_type == 'MPEG_DASH':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id=format_type, fatal=False))
+ elif format_type == 'HSS':
+ formats.extend(self._extract_ism_formats(
+ format_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_type,
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_urls = data.get('subtitleUrls')
+ if isinstance(subtitle_urls, list):
+ for subtitle in subtitle_urls:
+ subtitle_url = subtitle.get('url')
+ if subtitle_url and subtitle.get('type') == 'CLOSED':
+ subtitles.setdefault('nl', []).append({'url': subtitle_url})
+
+ return {
+ 'id': video_id,
+ 'display_id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'duration': float_or_none(data.get('duration'), 1000),
+ 'thumbnail': data.get('posterImageUrl'),
+ 'subtitles': subtitles,
+ }
+
+
+class CanvasEenIE(InfoExtractor):
+ IE_DESC = 'canvas.be and een.be'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
+ 'md5': 'ed66976748d12350b118455979cca293',
+ 'info_dict': {
+ 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+ 'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
+ 'ext': 'flv',
+ 'title': 'De afspraak veilt voor de Warmste Week',
+ 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 49.02,
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with subtitles
+ 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
+ 'info_dict': {
+ 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
+ 'display_id': 'pieter-0167',
+ 'ext': 'mp4',
+ 'title': 'Pieter 0167',
+ 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2553.08,
+ 'subtitles': {
+ 'nl': [{
+ 'ext': 'vtt',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Pagina niet gevonden',
+ }, {
+ 'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
+ 'info_dict': {
+ 'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
+ 'display_id': 'emma-pakt-thilly-aan',
+ 'ext': 'mp4',
+ 'title': 'Emma pakt Thilly aan',
+ 'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 118.24,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, display_id = mobj.group('site_id'), mobj.group('id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = strip_or_none(self._search_regex(
+ r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None))
+
+ video_id = self._html_search_regex(
+ r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
+ group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ }
+
+
+class VrtNUIE(GigyaBaseIE):
+ IE_DESC = 'VrtNU.be'
+ _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # Available via old API endpoint
+ 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
+ 'info_dict': {
+ 'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+ 'ext': 'mp4',
+ 'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
+ 'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
+ 'duration': 1457.04,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'series': 'Postbus X',
+ 'season': 'Seizoen 1989',
+ 'season_number': 1989,
+ 'episode': 'De zwarte weduwe',
+ 'episode_number': 1,
+ 'timestamp': 1595822400,
+ 'upload_date': '20200727',
+ },
+ 'skip': 'This video is only available for registered users',
+ 'params': {
+ 'username': '<snip>',
+ 'password': '<snip>',
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # Only available via new API endpoint
+ 'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
+ 'info_dict': {
+ 'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 5',
+ 'description': 'Wie valt door de mand tijdens een missie?',
+ 'duration': 2967.06,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode_number': 5,
+ },
+ 'skip': 'This video is only available for registered users',
+ 'params': {
+ 'username': '<snip>',
+ 'password': '<snip>',
+ },
+ 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
+ }]
+ _NETRC_MACHINE = 'vrtnu'
+ _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
+ _CONTEXT_ID = 'R3595707040'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ auth_data = {
+ 'APIKey': self._APIKEY,
+ 'targetEnv': 'jssdk',
+ 'loginID': username,
+ 'password': password,
+ 'authMode': 'cookie',
+ }
+
+ auth_info = self._gigya_login(auth_data)
+
+ # Sometimes authentication fails for no good reason, retry
+ login_attempt = 1
+ while login_attempt <= 3:
+ try:
+ # When requesting a token, no actual token is returned, but the
+ # necessary cookies are set.
+ self._request_webpage(
+ 'https://token.vrt.be',
+ None, note='Requesting a token', errnote='Could not get a token',
+ headers={
+ 'Content-Type': 'application/json',
+ 'Referer': 'https://www.vrt.be/vrtnu/',
+ },
+ data=json.dumps({
+ 'uid': auth_info['UID'],
+ 'uidsig': auth_info['UIDSignature'],
+ 'ts': auth_info['signatureTimestamp'],
+ 'email': auth_info['profile']['email'],
+ }).encode('utf-8'))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ login_attempt += 1
+ self.report_warning('Authentication failed')
+ self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
+ else:
+ raise e
+ else:
+ break
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ attrs = extract_attributes(self._search_regex(
+ r'(<nui-media[^>]+>)', webpage, 'media element'))
+ video_id = attrs['videoid']
+ publication_id = attrs.get('publicationid')
+ if publication_id:
+ video_id = publication_id + '$' + video_id
+
+ page = (self._parse_json(self._search_regex(
+ r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
+ default='{}'), video_id, fatal=False) or {}).get('page') or {}
+
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts(info, {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'season_number': int_or_none(page.get('episode_season')),
+ })
+
+
+class DagelijkseKostIE(InfoExtractor):
+ IE_DESC = 'dagelijksekost.een.be'
+ _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
+ 'md5': '30bfffc323009a3e5f689bef6efa2365',
+ 'info_dict': {
+ 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
+ 'display_id': 'hachis-parmentier-met-witloof',
+ 'ext': 'mp4',
+ 'title': 'Hachis parmentier met witloof',
+ 'description': 'md5:9960478392d87f63567b5b117688cdc5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 283.02,
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ title = strip_or_none(get_element_by_class(
+ 'dish-metadata__title', webpage
+ ) or self._html_search_meta(
+ 'twitter:title', webpage))
+
+ description = clean_html(get_element_by_class(
+ 'dish-description', webpage)
+ ) or self._html_search_meta(
+ ('description', 'twitter:description', 'og:description'),
+ webpage)
+
+ video_id = self._html_search_regex(
+ r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
+ group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/carambatv.py b/hypervideo_dl/extractor/carambatv.py
new file mode 100644
index 0000000..b57b86a
--- /dev/null
+++ b/hypervideo_dl/extractor/carambatv.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ try_get,
+)
+
+from .videomore import VideomoreIE
+
+
+class CarambaTVIE(InfoExtractor):
+ _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://video1.carambatv.ru/v/191910501',
+ 'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
+ 'info_dict': {
+ 'id': '191910501',
+ 'ext': 'mp4',
+ 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2678.31,
+ },
+ }, {
+ 'url': 'carambatv:191910501',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
+ video_id)
+
+ title = video['title']
+
+ base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
+
+ formats = [{
+ 'url': base_url + f['fn'],
+ 'height': int_or_none(f.get('height')),
+ 'format_id': '%sp' % f['height'] if f.get('height') else None,
+ } for f in video['qualities'] if f.get('fn')]
+ self._sort_formats(formats)
+
+ thumbnail = video.get('splash')
+ duration = float_or_none(try_get(
+ video, lambda x: x['annotations'][0]['end_time'], compat_str))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class CarambaTVPageIE(InfoExtractor):
+ _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
+ 'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
+ 'info_dict': {
+ 'id': '475222',
+ 'ext': 'flv',
+ 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ # duration reported by videomore is incorrect
+ 'duration': int,
+ },
+ 'add_ie': [VideomoreIE.ie_key()],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ videomore_url = VideomoreIE._extract_url(webpage)
+ if not videomore_url:
+ videomore_id = self._search_regex(
+ r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
+ default=None)
+ if videomore_id:
+ videomore_url = 'videomore:%s' % videomore_id
+ if videomore_url:
+ title = self._og_search_title(webpage)
+ return {
+ '_type': 'url_transparent',
+ 'url': videomore_url,
+ 'ie_key': VideomoreIE.ie_key(),
+ 'title': title,
+ }
+
+ video_url = self._og_search_property('video:iframe', webpage, default=None)
+
+ if not video_url:
+ video_id = self._search_regex(
+ r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
+ webpage, 'video id')
+ video_url = 'carambatv:%s' % video_id
+
+ return self.url_result(video_url, CarambaTVIE.ie_key())
diff --git a/hypervideo_dl/extractor/cartoonnetwork.py b/hypervideo_dl/extractor/cartoonnetwork.py
new file mode 100644
index 0000000..48b3361
--- /dev/null
+++ b/hypervideo_dl/extractor/cartoonnetwork.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .turner import TurnerBaseIE
+from ..utils import int_or_none
+
+
+class CartoonNetworkIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
+ _TEST = {
+ 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
+ 'info_dict': {
+ 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
+ 'ext': 'mp4',
+ 'title': 'How to Draw Upgrade',
+ 'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
+ metadata_re = ''
+ if content_re:
+ metadata_re = r'|video_metadata\.content_' + content_re
+ return self._search_regex(
+ r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
+ webpage, name, fatal=fatal)
+
+ media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
+ title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
+
+ info = self._extract_ngtv_info(
+ media_id, {'networkId': 'cartoonnetwork'}, {
+ 'url': url,
+ 'site_name': 'CartoonNetwork',
+ 'auth_required': find_field('authType', 'auth type') != 'unauth',
+ })
+
+ series = find_field(
+ 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage),
+ 'series': series,
+ 'episode': title,
+ })
+
+ for field in ('season', 'episode'):
+ field_name = field + 'Number'
+ info[field + '_number'] = int_or_none(find_field(
+ field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
+
+ return info
diff --git a/hypervideo_dl/extractor/cbc.py b/hypervideo_dl/extractor/cbc.py
new file mode 100644
index 0000000..fd5ec60
--- /dev/null
+++ b/hypervideo_dl/extractor/cbc.py
@@ -0,0 +1,497 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import json
+import re
+from xml.sax.saxutils import escape
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+ try_get,
+ xpath_text,
+ xpath_element,
+ xpath_with_ns,
+ find_xpath_attr,
+ orderedSet,
+ parse_duration,
+ parse_iso8601,
+ parse_age_limit,
+ strip_or_none,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class CBCIE(InfoExtractor):
+ IE_NAME = 'cbc.ca'
+ _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # with mediaId
+ 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
+ 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
+ 'info_dict': {
+ 'id': '2682904050',
+ 'ext': 'mp4',
+ 'title': 'Don Cherry – All-Stars',
+ 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
+ 'timestamp': 1454463000,
+ 'upload_date': '20160203',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
+ 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
+ 'md5': '162adfa070274b144f4fdc3c3b8207db',
+ 'info_dict': {
+ 'id': '2414435309',
+ 'ext': 'mp4',
+ 'title': '22 Minutes Update: What Not To Wear Quebec',
+ 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
+ 'upload_date': '20131025',
+ 'uploader': 'CBCC-NEW',
+ 'timestamp': 1382717907,
+ },
+ }, {
+ # with clipId, feed only available via tpfeed.cbc.ca
+ 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
+ 'md5': '0274a90b51a9b4971fe005c63f592f12',
+ 'info_dict': {
+ 'id': '2487345465',
+ 'ext': 'mp4',
+ 'title': 'Robin Williams freestyles on 90 Minutes Live',
+ 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
+ 'upload_date': '19780210',
+ 'uploader': 'CBCC-NEW',
+ 'timestamp': 255977160,
+ },
+ }, {
+ # multiple iframes
+ 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
+ 'playlist': [{
+ 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
+ 'info_dict': {
+ 'id': '2680832926',
+ 'ext': 'mp4',
+ 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
+ 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
+ 'upload_date': '20160201',
+ 'timestamp': 1454342820,
+ 'uploader': 'CBCC-NEW',
+ },
+ }, {
+ 'md5': '415a0e3f586113894174dfb31aa5bb1a',
+ 'info_dict': {
+ 'id': '2658915080',
+ 'ext': 'mp4',
+ 'title': 'Fly like an eagle!',
+ 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
+ 'upload_date': '20150315',
+ 'timestamp': 1426443984,
+ 'uploader': 'CBCC-NEW',
+ },
+ }],
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # multiple CBC.APP.Caffeine.initInstance(...)
+ 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
+ 'info_dict': {
+ 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
+ 'id': 'dog-indoor-exercise-winter-1.3928238',
+ 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
+ },
+ 'playlist_mincount': 6,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
+
+ def _extract_player_init(self, player_init, display_id):
+ player_info = self._parse_json(player_init, display_id, js_to_json)
+ media_id = player_info.get('mediaId')
+ if not media_id:
+ clip_id = player_info['clipId']
+ feed = self._download_json(
+ 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
+ clip_id, fatal=False)
+ if feed:
+ media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
+ if not media_id:
+ media_id = self._download_json(
+ 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+ clip_id)['entries'][0]['id'].split('/')[-1]
+ return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
+ entries = [
+ self._extract_player_init(player_init, display_id)
+ for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
+ entries.extend([
+ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+ for media_id in orderedSet(media_ids)])
+ return self.playlist_result(
+ entries, display_id, strip_or_none(title),
+ self._og_search_description(webpage))
+
+
+class CBCPlayerIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:player'
+ _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.cbc.ca/player/play/2683190193',
+ 'md5': '64d25f841ddf4ddb28a235338af32e2c',
+ 'info_dict': {
+ 'id': '2683190193',
+ 'ext': 'mp4',
+ 'title': 'Gerry Runs a Sweat Shop',
+ 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
+ 'timestamp': 1455071400,
+ 'upload_date': '20160210',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
+ 'url': 'http://www.cbc.ca/player/play/2657631896',
+ 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
+ 'info_dict': {
+ 'id': '2657631896',
+ 'ext': 'mp3',
+ 'title': 'CBC Montreal is organizing its first ever community hackathon!',
+ 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
+ 'timestamp': 1425704400,
+ 'upload_date': '20150307',
+ 'uploader': 'CBCC-NEW',
+ },
+ }, {
+ 'url': 'http://www.cbc.ca/player/play/2164402062',
+ 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
+ 'info_dict': {
+ 'id': '2164402062',
+ 'ext': 'mp4',
+ 'title': 'Cancer survivor four times over',
+ 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
+ 'timestamp': 1320410746,
+ 'upload_date': '20111104',
+ 'uploader': 'CBCC-NEW',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
+ 'force_smil_url': True
+ }),
+ 'id': video_id,
+ }
+
+
+class CBCWatchBaseIE(InfoExtractor):
+ _device_id = None
+ _device_token = None
+ _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
+ _NS_MAP = {
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
+ }
+ _GEO_COUNTRIES = ['CA']
+ _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
+ _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
+ _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
+ _NETRC_MACHINE = 'cbcwatch'
+
+ def _signature(self, email, password):
+ data = json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode()
+ headers = {'content-type': 'application/json'}
+ query = {'apikey': self._API_KEY}
+ resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
+ access_token = resp['access_token']
+
+ # token
+ query = {
+ 'access_token': access_token,
+ 'apikey': self._API_KEY,
+ 'jwtapp': 'jwt',
+ }
+ resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
+ return resp['signature']
+
+ def _call_api(self, path, video_id):
+ url = path if path.startswith('http') else self._API_BASE_URL + path
+ for _ in range(2):
+ try:
+ result = self._download_xml(url, video_id, headers={
+ 'X-Clearleap-DeviceId': self._device_id,
+ 'X-Clearleap-DeviceToken': self._device_token,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ # Device token has expired, re-acquiring device token
+ self._register_device()
+ continue
+ raise
+ error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
+ if error_message:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
+ return result
+
+ def _real_initialize(self):
+ if self._valid_device_token():
+ return
+ device = self._downloader.cache.load(
+ 'cbcwatch', self._cache_device_key()) or {}
+ self._device_id, self._device_token = device.get('id'), device.get('token')
+ if self._valid_device_token():
+ return
+ self._register_device()
+
+ def _valid_device_token(self):
+ return self._device_id and self._device_token
+
+ def _cache_device_key(self):
+ email, _ = self._get_login_info()
+ return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
+
+ def _register_device(self):
+ result = self._download_xml(
+ self._API_BASE_URL + 'device/register',
+ None, 'Acquiring device token',
+ data=b'<device><type>web</type></device>')
+ self._device_id = xpath_text(result, 'deviceId', fatal=True)
+ email, password = self._get_login_info()
+ if email and password:
+ signature = self._signature(email, password)
+ data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
+ escape(signature), escape(self._device_id)).encode()
+ url = self._API_BASE_URL + 'device/login'
+ result = self._download_xml(
+ url, None, data=data,
+ headers={'content-type': 'application/xml'})
+ self._device_token = xpath_text(result, 'token', fatal=True)
+ else:
+ self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+ self._downloader.cache.store(
+ 'cbcwatch', self._cache_device_key(), {
+ 'id': self._device_id,
+ 'token': self._device_token,
+ })
+
+ def _parse_rss_feed(self, rss):
+ channel = xpath_element(rss, 'channel', fatal=True)
+
+ def _add_ns(path):
+ return xpath_with_ns(path, self._NS_MAP)
+
+ entries = []
+ for item in channel.findall('item'):
+ guid = xpath_text(item, 'guid', fatal=True)
+ title = xpath_text(item, 'title', fatal=True)
+
+ media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
+ content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
+ content_url = content.attrib['url']
+
+ thumbnails = []
+ for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail.get('profile'),
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ timestamp = None
+ release_date = find_xpath_attr(
+ item, _add_ns('media:credit'), 'role', 'releaseDate')
+ if release_date is not None:
+ timestamp = parse_iso8601(release_date.text)
+
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': content_url,
+ 'id': guid,
+ 'title': title,
+ 'description': xpath_text(item, 'description'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(content.get('duration')),
+ 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
+ 'episode': xpath_text(item, _add_ns('clearleap:episode')),
+ 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
+ 'series': xpath_text(item, _add_ns('clearleap:series')),
+ 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
+ 'thumbnails': thumbnails,
+ 'ie_key': 'CBCWatchVideo',
+ })
+
+ return self.playlist_result(
+ entries, xpath_text(channel, 'guid'),
+ xpath_text(channel, 'title'),
+ xpath_text(channel, 'description'))
+
+
+class CBCWatchVideoIE(CBCWatchBaseIE):
+ IE_NAME = 'cbc.ca:watch:video'
+ _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TEST = {
+ # geo-restricted to Canada, bypassable
+ 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ result = self._call_api(url, video_id)
+
+ m3u8_url = xpath_text(result, 'url', fatal=True)
+ formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
+ if len(formats) < 2:
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ for f in formats:
+ format_id = f.get('format_id')
+ if format_id.startswith('AAC'):
+ f['acodec'] = 'aac'
+ elif format_id.startswith('AC3'):
+ f['acodec'] = 'ac-3'
+ self._sort_formats(formats)
+
+ info = {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+ rss = xpath_element(result, 'rss')
+ if rss:
+ info.update(self._parse_rss_feed(rss)['entries'][0])
+ del info['url']
+ del info['_type']
+ del info['ie_key']
+ return info
+
+
+class CBCWatchIE(CBCWatchBaseIE):
+ IE_NAME = 'cbc.ca:watch'
+ _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
+ _TESTS = [{
+ # geo-restricted to Canada, bypassable
+ 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
+ 'info_dict': {
+ 'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
+ 'ext': 'mp4',
+ 'title': 'Customer (Dis)Service',
+ 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
+ 'upload_date': '20160219',
+ 'timestamp': 1455840000,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }, {
+ # geo-restricted to Canada, bypassable
+ 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
+ 'info_dict': {
+ 'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
+ 'title': 'Arthur',
+ 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ rss = self._call_api('web/browse/' + video_id, video_id)
+ return self._parse_rss_feed(rss)
+
+
+class CBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:olympics'
+ _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._hidden_inputs(webpage)['videoId']
+ video_doc = self._download_xml(
+ 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
+ title = xpath_text(video_doc, 'title', fatal=True)
+ is_live = xpath_text(video_doc, 'kind') == 'Live'
+ if is_live:
+ title = self._live_title(title)
+
+ formats = []
+ for video_source in video_doc.findall('videoSources/videoSource'):
+ uri = xpath_text(video_source, 'uri')
+ if not uri:
+ continue
+ tokenize = self._download_json(
+ 'https://olympics.cbc.ca/api/api-akamai/tokenize',
+ video_id, data=json.dumps({
+ 'VideoSource': uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': url,
+ # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
+ 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
+ }, fatal=False)
+ if not tokenize:
+ continue
+ content_url = tokenize['ContentUrl']
+ video_source_format = video_source.get('format')
+ if video_source_format == 'IIS':
+ formats.extend(self._extract_ism_formats(
+ content_url, video_id, ism_id=video_source_format, fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ content_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': xpath_text(video_doc, 'description'),
+ 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
+ 'duration': parse_duration(xpath_text(video_doc, 'duration')),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/cbs.py b/hypervideo_dl/extractor/cbs.py
new file mode 100644
index 0000000..c79e55a
--- /dev/null
+++ b/hypervideo_dl/extractor/cbs.py
@@ -0,0 +1,115 @@
+from __future__ import unicode_literals
+
+from .theplatform import ThePlatformFeedIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ find_xpath_attr,
+ xpath_element,
+ xpath_text,
+ update_url_query,
+)
+
+
+class CBSBaseIE(ThePlatformFeedIE):
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ subtitles = {}
+ for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
+ cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k)
+ if cc_e is not None:
+ cc_url = cc_e.get('value')
+ if cc_url:
+ subtitles.setdefault(subtitles_lang, []).append({
+ 'ext': ext,
+ 'url': cc_url,
+ })
+ return subtitles
+
+
+class CBSIE(CBSBaseIE):
+ _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
+ 'info_dict': {
+ 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
+ 'ext': 'mp4',
+ 'title': 'Connect Chat feat. Garth Brooks',
+ 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
+ 'duration': 1495,
+ 'timestamp': 1385585425,
+ 'upload_date': '20131127',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ '_skip': 'Blocked outside the US',
+ }, {
+ 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
+ items_data = self._download_xml(
+ 'http://can.cbs.com/thunder/player/videoPlayerService.php',
+ content_id, query={'partner': site, 'contentId': content_id})
+ video_data = xpath_element(items_data, './/item')
+ title = xpath_text(video_data, 'videoTitle', 'title', True)
+ tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
+ tp_release_url = 'http://link.theplatform.com/s/' + tp_path
+
+ asset_types = []
+ subtitles = {}
+ formats = []
+ last_e = None
+ for item in items_data.findall('.//item'):
+ asset_type = xpath_text(item, 'assetType')
+ if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
+ continue
+ asset_types.append(asset_type)
+ query = {
+ 'mbr': 'true',
+ 'assetTypes': asset_type,
+ }
+ if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
+ query['formats'] = 'MPEG4,M3U'
+ elif asset_type in ('RTMP', 'WIFI', '3G'):
+ query['formats'] = 'MPEG4,FLV'
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(tp_release_url, query), content_id,
+ 'Downloading %s SMIL data' % asset_type)
+ except ExtractorError as e:
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ raise last_e
+ self._sort_formats(formats)
+
+ info = self._extract_theplatform_metadata(tp_path, content_id)
+ info.update({
+ 'id': content_id,
+ 'title': title,
+ 'series': xpath_text(video_data, 'seriesTitle'),
+ 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+ 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+ 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
+ 'thumbnail': xpath_text(video_data, 'previewImageURL'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return info
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ return self._extract_video_info(content_id)
diff --git a/hypervideo_dl/extractor/cbsinteractive.py b/hypervideo_dl/extractor/cbsinteractive.py
new file mode 100644
index 0000000..6596e98
--- /dev/null
+++ b/hypervideo_dl/extractor/cbsinteractive.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .cbs import CBSIE
+from ..utils import int_or_none
+
+
+class CBSInteractiveIE(CBSIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
+ _TESTS = [{
+ 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
+ 'info_dict': {
+ 'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
+ 'display_id': 'hands-on-with-microsofts-windows-8-1-update',
+ 'ext': 'mp4',
+ 'title': 'Hands-on with Microsoft Windows 8.1 Update',
+ 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
+ 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
+ 'uploader': 'Sarah Mitroff',
+ 'duration': 70,
+ 'timestamp': 1396479627,
+ 'upload_date': '20140402',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
+ 'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
+ 'info_dict': {
+ 'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
+ 'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
+ 'ext': 'mp4',
+ 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
+ 'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
+ 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
+ 'uploader': 'Ashley Esqueda',
+ 'duration': 1482,
+ 'timestamp': 1433289889,
+ 'upload_date': '20150603',
+ },
+ }, {
+ 'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
+ 'info_dict': {
+ 'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
+ 'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
+ 'ext': 'mp4',
+ 'title': 'Video: Keeping Android smartphones and tablets secure',
+ 'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
+ 'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
+ 'uploader': 'Adrian Kingsley-Hughes',
+ 'duration': 731,
+ 'timestamp': 1449129925,
+ 'upload_date': '20151203',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
+ 'only_matching': True,
+ }]
+
+ MPX_ACCOUNTS = {
+ 'cnet': 2198311517,
+ 'zdnet': 2387448114,
+ }
+
+ def _real_extract(self, url):
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+
+ data_json = self._html_search_regex(
+ r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
+ webpage, 'data json')
+ data = self._parse_json(data_json, display_id)
+ vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
+
+ video_id = vdata['mpxRefId']
+
+ title = vdata['title']
+ author = vdata.get('author')
+ if author:
+ uploader = '%s %s' % (author['firstName'], author['lastName'])
+ uploader_id = author.get('id')
+ else:
+ uploader = None
+ uploader_id = None
+
+ info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
+ info.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'duration': int_or_none(vdata.get('duration')),
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/cbslocal.py b/hypervideo_dl/extractor/cbslocal.py
new file mode 100644
index 0000000..3b7e1a8
--- /dev/null
+++ b/hypervideo_dl/extractor/cbslocal.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .anvato import AnvatoIE
+from .sendtonews import SendtoNewsIE
+from ..compat import compat_urlparse
+from ..utils import (
+ parse_iso8601,
+ unified_timestamp,
+)
+
+
+class CBSLocalIE(AnvatoIE):
+ _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
+ _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
+ 'info_dict': {
+ 'id': '3580809',
+ 'ext': 'mp4',
+ 'title': 'A Very Blue Anniversary',
+ 'description': 'CBS2’s Cindy Hsu has more.',
+ 'thumbnail': 're:^https?://.*',
+ 'timestamp': int,
+ 'upload_date': r're:^\d{8}$',
+ 'uploader': 'CBS',
+ 'subtitles': {
+ 'en': 'mincount:5',
+ },
+ 'categories': [
+ 'Stations\\Spoken Word\\WCBSTV',
+ 'Syndication\\AOL',
+ 'Syndication\\MSN',
+ 'Syndication\\NDN',
+ 'Syndication\\Yahoo',
+ 'Content\\News',
+ 'Content\\News\\Local News',
+ ],
+ 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mcp_id = self._match_id(url)
+ return self.url_result(
+ 'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
+
+
+class CBSLocalArticleIE(AnvatoIE):
+ _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+
+ _TESTS = [{
+ # Anvato backend
+ 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
+ 'md5': 'f0ee3081e3843f575fccef901199b212',
+ 'info_dict': {
+ 'id': '3401037',
+ 'ext': 'mp4',
+ 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
+ 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
+ 'thumbnail': 're:^https?://.*',
+ 'timestamp': 1463440500,
+ 'upload_date': '20160516',
+ 'uploader': 'CBS',
+ 'subtitles': {
+ 'en': 'mincount:5',
+ },
+ 'categories': [
+ 'Stations\\Spoken Word\\KCBSTV',
+ 'Syndication\\MSN',
+ 'Syndication\\NDN',
+ 'Syndication\\AOL',
+ 'Syndication\\Yahoo',
+ 'Syndication\\Tribune',
+ 'Syndication\\Curb.tv',
+ 'Content\\News'
+ ],
+ 'tags': ['CBS 2 News Evening'],
+ },
+ }, {
+ # SendtoNews embed
+ 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
+ 'info_dict': {
+ 'id': 'GxfCe0Zo7D-175909-5588',
+ },
+ 'playlist_count': 9,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ sendtonews_url = SendtoNewsIE._extract_url(webpage)
+ if sendtonews_url:
+ return self.url_result(
+ compat_urlparse.urljoin(url, sendtonews_url),
+ ie=SendtoNewsIE.ie_key())
+
+ info_dict = self._extract_anvato_videos(webpage, display_id)
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
+ 'released date', default=None)) or parse_iso8601(
+ self._html_search_meta('uploadDate', webpage))
+
+ info_dict.update({
+ 'display_id': display_id,
+ 'timestamp': timestamp,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/cbsnews.py b/hypervideo_dl/extractor/cbsnews.py
new file mode 100644
index 0000000..1285ed6
--- /dev/null
+++ b/hypervideo_dl/extractor/cbsnews.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import zlib
+
+from .common import InfoExtractor
+from .cbs import CBSIE
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ parse_duration,
+)
+
+
+class CBSNewsEmbedIE(CBSIE):
+ IE_NAME = 'cbsnews:embed'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ item = self._parse_json(zlib.decompress(compat_b64decode(
+ compat_urllib_parse_unquote(self._match_id(url))),
+ -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
+ return self._extract_video_info(item['mpxRefId'], 'cbsnews')
+
+
+class CBSNewsIE(CBSIE):
+ IE_NAME = 'cbsnews'
+ IE_DESC = 'CBS News'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
+
+ _TESTS = [
+ {
+ # 60 minutes
+ 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
+ 'info_dict': {
+ 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
+ 'ext': 'flv',
+ 'title': 'Artificial Intelligence, real-life applications',
+ 'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 317,
+ 'uploader': 'CBSI-NEW',
+ 'timestamp': 1476046464,
+ 'upload_date': '20161009',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+ 'info_dict': {
+ 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
+ 'ext': 'mp4',
+ 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+ 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
+ 'upload_date': '20140404',
+ 'timestamp': 1396650660,
+ 'uploader': 'CBSI-NEW',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 205,
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'ttml',
+ }],
+ },
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ # 48 hours
+ 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
+ 'info_dict': {
+ 'title': 'Cold as Ice',
+ 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
+ },
+ 'playlist_mincount': 7,
+ },
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
+ entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
+ if entries:
+ return self.playlist_result(
+ entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
+
+ item = self._parse_json(self._html_search_regex(
+ r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
+ webpage, 'video JSON info'), display_id)['items'][0]
+ return self._extract_video_info(item['mpxRefId'], 'cbsnews')
+
+
+class CBSNewsLiveVideoIE(InfoExtractor):
+ IE_NAME = 'cbsnews:livevideo'
+ IE_DESC = 'CBS News Live Videos'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
+
+ # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
+ _TEST = {
+ 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
+ 'info_dict': {
+ 'id': 'clinton-sanders-prepare-to-face-off-in-nh',
+ 'ext': 'mp4',
+ 'title': 'Clinton, Sanders Prepare To Face Off In NH',
+ 'duration': 334,
+ },
+ 'skip': 'Video gone',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ video_info = self._download_json(
+ 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
+ 'device': 'desktop',
+ 'dvr_slug': display_id,
+ })
+
+ formats = self._extract_akamai_formats(video_info['url'], display_id)
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'display_id': display_id,
+ 'title': video_info['headline'],
+ 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
+ 'duration': parse_duration(video_info.get('segmentDur')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cbssports.py b/hypervideo_dl/extractor/cbssports.py
new file mode 100644
index 0000000..a891c9a
--- /dev/null
+++ b/hypervideo_dl/extractor/cbssports.py
@@ -0,0 +1,113 @@
+from __future__ import unicode_literals
+
+import re
+
+# from .cbs import CBSBaseIE
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+)
+
+
+# class CBSSportsEmbedIE(CBSBaseIE):
+class CBSSportsEmbedIE(InfoExtractor):
+ IE_NAME = 'cbssports:embed'
+ _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
+ (?:
+ ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
+ pcid%3D(?P<pcid>\d+)
+ )'''
+ _TESTS = [{
+ 'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
+ 'only_matching': True,
+ }]
+
+ # def _extract_video_info(self, filter_query, video_id):
+ # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+
+ def _real_extract(self, url):
+ uuid, pcid = re.match(self._VALID_URL, url).groups()
+ query = {'id': uuid} if uuid else {'pcid': pcid}
+ video = self._download_json(
+ 'https://www.cbssports.com/api/content/video/',
+ uuid or pcid, query=query)[0]
+ video_id = video['id']
+ title = video['title']
+ metadata = video.get('metaData') or {}
+ # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
+ # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
+
+ formats = self._extract_m3u8_formats(
+ metadata['files'][0]['url'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ self._sort_formats(formats)
+
+ image = video.get('image')
+ thumbnails = None
+ if image:
+ image_path = image.get('path')
+ if image_path:
+ thumbnails = [{
+ 'url': image_path,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ 'filesize': int_or_none(image.get('size')),
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
+ 'duration': int_or_none(metadata.get('duration')),
+ }
+
+
+class CBSSportsBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ iframe_url = self._search_regex(
+ r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
+ webpage, 'embed url')
+ return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
+
+
+class CBSSportsIE(CBSSportsBaseIE):
+ IE_NAME = 'cbssports'
+ _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
+ 'info_dict': {
+ 'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
+ 'ext': 'mp4',
+ 'title': 'Cover 3: Stanford Spring Gleaning',
+ 'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
+ 'timestamp': 1617218398,
+ 'upload_date': '20210331',
+ 'duration': 502,
+ },
+ }]
+
+
+class TwentyFourSevenSportsIE(CBSSportsBaseIE):
+ IE_NAME = '247sports'
+ _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
+ 'info_dict': {
+ 'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
+ 'ext': 'mp4',
+ 'title': '2021 QB Jake Garcia senior highlights through five games',
+ 'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
+ 'timestamp': 1607114223,
+ 'upload_date': '20201204',
+ 'duration': 208,
+ },
+ }]
diff --git a/hypervideo_dl/extractor/ccc.py b/hypervideo_dl/extractor/ccc.py
new file mode 100644
index 0000000..36e6dff
--- /dev/null
+++ b/hypervideo_dl/extractor/ccc.py
@@ -0,0 +1,111 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+ url_or_none,
+)
+
+
+class CCCIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
+ 'md5': '3a1eda8f3a29515d27f5adb967d7e740',
+ 'info_dict': {
+ 'id': '1839',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Processor Design',
+ 'creator': 'byterazor',
+ 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20131228',
+ 'timestamp': 1388188800,
+ 'duration': 3710,
+ 'tags': list,
+ }
+ }, {
+ 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id')
+ event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
+
+ formats = []
+ for recording in event_data.get('recordings', []):
+ recording_url = recording.get('recording_url')
+ if not recording_url:
+ continue
+ language = recording.get('language')
+ folder = recording.get('folder')
+ format_id = None
+ if language:
+ format_id = language
+ if folder:
+ if language:
+ format_id += '-' + folder
+ else:
+ format_id = folder
+ vcodec = 'h264' if 'h264' in folder else (
+ 'none' if folder in ('mp3', 'opus') else None
+ )
+ formats.append({
+ 'format_id': format_id,
+ 'url': recording_url,
+ 'width': int_or_none(recording.get('width')),
+ 'height': int_or_none(recording.get('height')),
+ 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
+ 'language': language,
+ 'vcodec': vcodec,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': event_id,
+ 'display_id': display_id,
+ 'title': event_data['title'],
+ 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])),
+ 'description': event_data.get('description'),
+ 'thumbnail': event_data.get('thumb_url'),
+ 'timestamp': parse_iso8601(event_data.get('date')),
+ 'duration': int_or_none(event_data.get('length')),
+ 'tags': event_data.get('tags'),
+ 'formats': formats,
+ }
+
+
+class CCCPlaylistIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de:lists'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/c/30c3',
+ 'info_dict': {
+ 'title': '30C3',
+ 'id': '30c3',
+ },
+ 'playlist_count': 135,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url).lower()
+
+ conf = self._download_json(
+ 'https://media.ccc.de/public/conferences/' + playlist_id,
+ playlist_id)
+
+ entries = []
+ for e in conf['events']:
+ event_url = url_or_none(e.get('frontend_link'))
+ if event_url:
+ entries.append(self.url_result(event_url, ie=CCCIE.ie_key()))
+
+ return self.playlist_result(entries, playlist_id, conf.get('title'))
diff --git a/hypervideo_dl/extractor/ccma.py b/hypervideo_dl/extractor/ccma.py
new file mode 100644
index 0000000..e6ae493
--- /dev/null
+++ b/hypervideo_dl/extractor/ccma.py
@@ -0,0 +1,155 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import calendar
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_timezone,
+ int_or_none,
+ parse_duration,
+ parse_resolution,
+ try_get,
+ url_or_none,
+)
+
+
+class CCMAIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
+ 'md5': '7296ca43977c8ea4469e719c609b0871',
+ 'info_dict': {
+ 'id': '5630208',
+ 'ext': 'mp4',
+ 'title': 'L\'espot de La Marató de TV3',
+ 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
+ 'timestamp': 1478608140,
+ 'upload_date': '20161108',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
+ 'md5': 'fa3e38f269329a278271276330261425',
+ 'info_dict': {
+ 'id': '943685',
+ 'ext': 'mp3',
+ 'title': 'El Consell de Savis analitza el derbi',
+ 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
+ 'upload_date': '20170512',
+ 'timestamp': 1494622500,
+ 'vcodec': 'none',
+ 'categories': ['Esports'],
+ }
+ }, {
+ 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
+ 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
+ 'info_dict': {
+ 'id': '6031387',
+ 'ext': 'mp4',
+ 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
+ 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
+ 'timestamp': 1582577700,
+ 'upload_date': '20200224',
+ 'subtitles': 'mincount:4',
+ 'age_limit': 16,
+ 'series': 'Crims',
+ }
+ }]
+
+ def _real_extract(self, url):
+ media_type, media_id = re.match(self._VALID_URL, url).groups()
+
+ media = self._download_json(
+ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
+ 'media': media_type,
+ 'idint': media_id,
+ })
+
+ formats = []
+ media_url = media['media']['url']
+ if isinstance(media_url, list):
+ for format_ in media_url:
+ format_url = url_or_none(format_.get('file'))
+ if not format_url:
+ continue
+ label = format_.get('label')
+ f = parse_resolution(label)
+ f.update({
+ 'url': format_url,
+ 'format_id': label,
+ })
+ formats.append(f)
+ else:
+ formats.append({
+ 'url': media_url,
+ 'vcodec': 'none' if media_type == 'audio' else None,
+ })
+ self._sort_formats(formats)
+
+ informacio = media['informacio']
+ title = informacio['titol']
+ durada = informacio.get('durada') or {}
+ duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
+ tematica = try_get(informacio, lambda x: x['tematica']['text'])
+
+ timestamp = None
+ data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
+ try:
+ timezone, data_utc = extract_timezone(data_utc)
+ timestamp = calendar.timegm((datetime.datetime.strptime(
+ data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
+ except TypeError:
+ pass
+
+ subtitles = {}
+ subtitols = media.get('subtitols') or []
+ if isinstance(subtitols, dict):
+ subtitols = [subtitols]
+ for st in subtitols:
+ sub_url = st.get('url')
+ if sub_url:
+ subtitles.setdefault(
+ st.get('iso') or st.get('text') or 'ca', []).append({
+ 'url': sub_url,
+ })
+
+ thumbnails = []
+ imatges = media.get('imatges', {})
+ if imatges:
+ thumbnail_url = imatges.get('url')
+ if thumbnail_url:
+ thumbnails = [{
+ 'url': thumbnail_url,
+ 'width': int_or_none(imatges.get('amplada')),
+ 'height': int_or_none(imatges.get('alcada')),
+ }]
+
+ age_limit = None
+ codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
+ if codi_etic:
+ codi_etic_s = codi_etic.split('_')
+ if len(codi_etic_s) == 2:
+ if codi_etic_s[1] == 'TP':
+ age_limit = 0
+ else:
+ age_limit = int_or_none(codi_etic_s[1])
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': clean_html(informacio.get('descripcio')),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ 'alt_title': informacio.get('titol_complet'),
+ 'episode_number': int_or_none(informacio.get('capitol')),
+ 'categories': [tematica] if tematica else None,
+ 'series': informacio.get('programa'),
+ }
diff --git a/hypervideo_dl/extractor/cctv.py b/hypervideo_dl/extractor/cctv.py
new file mode 100644
index 0000000..c76f361
--- /dev/null
+++ b/hypervideo_dl/extractor/cctv.py
@@ -0,0 +1,191 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class CCTVIE(InfoExtractor):
+ IE_DESC = '央视网'
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
+ _TESTS = [{
+ # fo.addVariable("videoCenterId","id")
+ 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
+ 'md5': 'd61ec00a493e09da810bf406a078f691',
+ 'info_dict': {
+ 'id': '5ecdbeab623f4973b40ff25f18b174e8',
+ 'ext': 'mp4',
+ 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
+ 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
+ 'duration': 98,
+ 'uploader': 'songjunjie',
+ 'timestamp': 1455279956,
+ 'upload_date': '20160212',
+ },
+ }, {
+ # var guid = "id"
+ 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
+ 'info_dict': {
+ 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
+ 'ext': 'mp4',
+ 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
+ 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。',
+ 'duration': 37,
+ 'uploader': 'shujun',
+ 'timestamp': 1454677291,
+ 'upload_date': '20160205',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # changePlayer('id')
+ 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
+ 'info_dict': {
+ 'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
+ 'ext': 'mp4',
+ 'title': 'NHnews008 ANNUAL POLITICAL SEASON',
+ 'description': 'Four Comprehensives',
+ 'duration': 60,
+ 'uploader': 'zhangyunlei',
+ 'timestamp': 1425385521,
+ 'upload_date': '20150303',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # loadvideo('id')
+ 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
+ 'info_dict': {
+ 'id': 'b15f009ff45c43968b9af583fc2e04b2',
+ 'ext': 'mp4',
+ 'title': 'Путь,усыпанный космеями Серия 1',
+ 'description': 'Путь, усыпанный космеями',
+ 'duration': 2645,
+ 'uploader': 'renxue',
+ 'timestamp': 1477479241,
+ 'upload_date': '20161026',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # var initMyAray = 'id'
+ 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
+ 'info_dict': {
+ 'id': 'a194cfa7f18c426b823d876668325946',
+ 'ext': 'mp4',
+ 'title': '小泽征尔音乐塾 音乐梦想无国界',
+ 'duration': 2173,
+ 'timestamp': 1369248264,
+ 'upload_date': '20130522',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # var ids = ["id"]
+ 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
+ 'info_dict': {
+ 'id': 'a8606119a4884588a79d81c02abecc16',
+ 'ext': 'mp3',
+ 'title': '来自维也纳的新年贺礼',
+ 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7',
+ 'duration': 1578,
+ 'uploader': 'djy',
+ 'timestamp': 1482942419,
+ 'upload_date': '20161228',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
+ r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+ r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
+ r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
+ r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
+ r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'],
+ webpage, 'video id')
+
+ data = self._download_json(
+ 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id,
+ query={
+ 'pid': video_id,
+ 'url': url,
+ 'idl': 32,
+ 'idlr': 32,
+ 'modifyed': 'false',
+ })
+
+ title = data['title']
+
+ formats = []
+
+ video = data.get('video')
+ if isinstance(video, dict):
+ for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
+ video_url = try_get(
+ video, lambda x: x[chapters_key][0]['url'], compat_str)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ 'quality': quality,
+ 'preference': -1,
+ })
+
+ hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
+ if hls_url:
+ hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats)
+
+ uploader = data.get('editer_name')
+ description = self._html_search_meta(
+ 'description', webpage, default=None)
+ timestamp = unified_timestamp(data.get('f_pgmtime'))
+ duration = float_or_none(try_get(video, lambda x: x['totalLength']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cda.py b/hypervideo_dl/extractor/cda.py
new file mode 100644
index 0000000..e1b3919
--- /dev/null
+++ b/hypervideo_dl/extractor/cda.py
@@ -0,0 +1,214 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import codecs
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_chr,
+ compat_ord,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ multipart_encode,
+ parse_duration,
+ random_birthday,
+ urljoin,
+)
+
+
+class CDAIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+ _BASE_URL = 'http://www.cda.pl/'
+ _TESTS = [{
+ 'url': 'http://www.cda.pl/video/5749950c',
+ 'md5': '6f844bf51b15f31fae165365707ae970',
+ 'info_dict': {
+ 'id': '5749950c',
+ 'ext': 'mp4',
+ 'height': 720,
+ 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
+ 'description': 'md5:269ccd135d550da90d1662651fcb9772',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'average_rating': float,
+ 'duration': 39,
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'http://www.cda.pl/video/57413289',
+ 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
+ 'info_dict': {
+ 'id': '57413289',
+ 'ext': 'mp4',
+ 'title': 'Lądowanie na lotnisku na Maderze',
+ 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'crash404',
+ 'view_count': int,
+ 'average_rating': float,
+ 'duration': 137,
+ 'age_limit': 0,
+ }
+ }, {
+ # Age-restricted
+ 'url': 'http://www.cda.pl/video/1273454c4',
+ 'info_dict': {
+ 'id': '1273454c4',
+ 'ext': 'mp4',
+ 'title': 'Bronson (2008) napisy HD 1080p',
+ 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+ 'height': 1080,
+ 'uploader': 'boniek61',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5554,
+ 'age_limit': 18,
+ 'view_count': int,
+ 'average_rating': float,
+ },
+ }, {
+ 'url': 'http://ebd.cda.pl/0x0/5749950c',
+ 'only_matching': True,
+ }]
+
+ def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
+ form_data = random_birthday('rok', 'miesiac', 'dzien')
+ form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
+ data, content_type = multipart_encode(form_data)
+ return self._download_webpage(
+ urljoin(url, '/a/validatebirth'), video_id, *args,
+ data=data, headers={
+ 'Referer': url,
+ 'Content-Type': content_type,
+ }, **kwargs)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._set_cookie('cda.pl', 'cda.player', 'html5')
+ webpage = self._download_webpage(
+ self._BASE_URL + '/video/' + video_id, video_id)
+
+ if 'Ten film jest dostępny dla użytkowników premium' in webpage:
+ raise ExtractorError('This video is only available for premium users.', expected=True)
+
+ if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
+ self.raise_geo_restricted()
+
+ need_confirm_age = False
+ if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
+ webpage, 'birthday validate form', default=None):
+ webpage = self._download_age_confirm_page(
+ url, video_id, note='Confirming age')
+ need_confirm_age = True
+
+ formats = []
+
+ uploader = self._search_regex(r'''(?x)
+ <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
+ (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
+ <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
+ ''', webpage, 'uploader', default=None, group='uploader')
+ view_count = self._search_regex(
+ r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
+ 'view_count', default=None)
+ average_rating = self._search_regex(
+ (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
+ r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
+ group='rating_value')
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'uploader': uploader,
+ 'view_count': int_or_none(view_count),
+ 'average_rating': float_or_none(average_rating),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'duration': None,
+ 'age_limit': 18 if need_confirm_age else 0,
+ }
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ # Source: https://www.cda.pl/js/player.js?t=1606154898
+ def decrypt_file(a):
+ for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
+ a = a.replace(p, '')
+ a = compat_urllib_parse_unquote(a)
+ b = []
+ for c in a:
+ f = compat_ord(c)
+ b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
+ a = ''.join(b)
+ a = a.replace('.cda.mp4', '')
+ for p in ('.2cda.pl', '.3cda.pl'):
+ a = a.replace(p, '.cda.pl')
+ if '/upstream' in a:
+ a = a.replace('/upstream', '.mp4/upstream')
+ return 'https://' + a
+ return 'https://' + a + '.mp4'
+
+ def extract_format(page, version):
+ json_str = self._html_search_regex(
+ r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
+ '%s player_json' % version, fatal=False, group='player_data')
+ if not json_str:
+ return
+ player_data = self._parse_json(
+ json_str, '%s player_data' % version, fatal=False)
+ if not player_data:
+ return
+ video = player_data.get('video')
+ if not video or 'file' not in video:
+ self.report_warning('Unable to extract %s version information' % version)
+ return
+ if video['file'].startswith('uggc'):
+ video['file'] = codecs.decode(video['file'], 'rot_13')
+ if video['file'].endswith('adc.mp4'):
+ video['file'] = video['file'].replace('adc.mp4', '.mp4')
+ elif not video['file'].startswith('http'):
+ video['file'] = decrypt_file(video['file'])
+ f = {
+ 'url': video['file'],
+ }
+ m = re.search(
+ r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
+ page)
+ if m:
+ f.update({
+ 'format_id': m.group('format_id'),
+ 'height': int(m.group('height')),
+ })
+ info_dict['formats'].append(f)
+ if not info_dict['duration']:
+ info_dict['duration'] = parse_duration(video.get('duration'))
+
+ extract_format(webpage, 'default')
+
+ for href, resolution in re.findall(
+ r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
+ webpage):
+ if need_confirm_age:
+ handler = self._download_age_confirm_page
+ else:
+ handler = self._download_webpage
+
+ webpage = handler(
+ urljoin(self._BASE_URL, href), video_id,
+ 'Downloading %s version information' % resolution, fatal=False)
+ if not webpage:
+ # Manually report warning because empty page is returned when
+ # invalid version is requested.
+ self.report_warning('Unable to download %s version information' % resolution)
+ continue
+
+ extract_format(webpage, resolution)
+
+ self._sort_formats(formats)
+
+ return merge_dicts(info_dict, info)
diff --git a/hypervideo_dl/extractor/ceskatelevize.py b/hypervideo_dl/extractor/ceskatelevize.py
new file mode 100644
index 0000000..7cb4efb
--- /dev/null
+++ b/hypervideo_dl/extractor/ceskatelevize.py
@@ -0,0 +1,289 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ sanitized_Request,
+ unescapeHTML,
+ update_url_query,
+ urlencode_postdata,
+ USER_AGENTS,
+)
+
+
+class CeskaTelevizeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
+ 'info_dict': {
+ 'id': '61924494877246241',
+ 'ext': 'mp4',
+ 'title': 'Hyde Park Civilizace: Život v Grónsku',
+ 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3350,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
+ 'info_dict': {
+ 'id': '61924494877028507',
+ 'ext': 'mp4',
+ 'title': 'Hyde Park Civilizace: Bonus 01 - En',
+ 'description': 'English Subtittles',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 81.3,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'info_dict': {
+ 'id': 402,
+ 'ext': 'mp4',
+ 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Georestricted to Czech Republic',
+ }, {
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
+ if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ type_ = None
+ episode_id = None
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
+ default='{}'), playlist_id)
+ if playlist:
+ type_ = playlist.get('type')
+ episode_id = playlist.get('id')
+
+ if not type_:
+ type_ = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
+ webpage, 'type')
+ if not episode_id:
+ episode_id = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
+ webpage, 'episode_id')
+
+ data = {
+ 'playlist[0][type]': type_,
+ 'playlist[0][id]': episode_id,
+ 'requestUrl': compat_urllib_parse_urlparse(url).path,
+ 'requestSource': 'iVysilani',
+ }
+
+ entries = []
+
+ for user_agent in (None, USER_AGENTS['Safari']):
+ req = sanitized_Request(
+ 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+ data=urlencode_postdata(data))
+
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ req.add_header('x-addr', '127.0.0.1')
+ req.add_header('X-Requested-With', 'XMLHttpRequest')
+ if user_agent:
+ req.add_header('User-Agent', user_agent)
+ req.add_header('Referer', url)
+
+ playlistpage = self._download_json(req, playlist_id, fatal=False)
+
+ if not playlistpage:
+ continue
+
+ playlist_url = playlistpage['url']
+ if playlist_url == 'error_region':
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
+ req.add_header('Referer', url)
+
+ playlist_title = self._og_search_title(webpage, default=None)
+ playlist_description = self._og_search_description(webpage, default=None)
+
+ playlist = self._download_json(req, playlist_id, fatal=False)
+ if not playlist:
+ continue
+
+ playlist = playlist.get('playlist')
+ if not isinstance(playlist, list):
+ continue
+
+ playlist_len = len(playlist)
+
+ for num, item in enumerate(playlist):
+ is_live = item.get('type') == 'LIVE'
+ formats = []
+ for format_id, stream_url in item.get('streamUrls', {}).items():
+ if 'drmOnly=true' in stream_url:
+ continue
+ if 'playerType=flash' in stream_url:
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, playlist_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls-%s' % format_id, fatal=False)
+ else:
+ stream_formats = self._extract_mpd_formats(
+ stream_url, playlist_id,
+ mpd_id='dash-%s' % format_id, fatal=False)
+ # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
+ if format_id == 'audioDescription':
+ for f in stream_formats:
+ f['source_preference'] = -10
+ formats.extend(stream_formats)
+
+ if user_agent and len(entries) == playlist_len:
+ entries[num]['formats'].extend(formats)
+ continue
+
+ item_id = item.get('id') or item['assetId']
+ title = item['title']
+
+ duration = float_or_none(item.get('duration'))
+ thumbnail = item.get('previewImageUrl')
+
+ subtitles = {}
+ if item.get('type') == 'VOD':
+ subs = item.get('subtitles')
+ if subs:
+ subtitles = self.extract_subtitles(episode_id, subs)
+
+ if playlist_len == 1:
+ final_title = playlist_title or title
+ if is_live:
+ final_title = self._live_title(final_title)
+ else:
+ final_title = '%s (%s)' % (playlist_title, title)
+
+ entries.append({
+ 'id': item_id,
+ 'title': final_title,
+ 'description': playlist_description if playlist_len == 1 else None,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ })
+
+ for e in entries:
+ self._sort_formats(e['formats'])
+
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ def _get_subtitles(self, episode_id, subs):
+ original_subtitles = self._download_webpage(
+ subs[0]['url'], episode_id, 'Downloading subtitles')
+ srt_subs = self._fix_subtitles(original_subtitles)
+ return {
+ 'cs': [{
+ 'ext': 'srt',
+ 'data': srt_subs,
+ }]
+ }
+
+ @staticmethod
+ def _fix_subtitles(subtitles):
+ """ Convert millisecond-based subtitles to SRT """
+
+ def _msectotimecode(msec):
+ """ Helper utility to convert milliseconds to timecode """
+ components = []
+ for divider in [1000, 60, 60, 100]:
+ components.append(msec % divider)
+ msec //= divider
+ return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
+
+ def _fix_subtitle(subtitle):
+ for line in subtitle.splitlines():
+ m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
+ if m:
+ yield m.group(1)
+ start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+ yield '{0} --> {1}'.format(start, stop)
+ else:
+ yield line
+
+ return '\r\n'.join(_fix_subtitle(subtitles))
+
+
+class CeskaTelevizePoradyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ # video with 18+ caution trailer
+ 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
+ 'info_dict': {
+ 'id': '215562210900007-bogotart',
+ 'title': 'Queer: Bogotart',
+ 'description': 'Alternativní průvodce současným queer světem',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '61924494876844842',
+ 'ext': 'mp4',
+ 'title': 'Queer: Bogotart (Varování 18+)',
+ 'duration': 10.2,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '61924494877068022',
+ 'ext': 'mp4',
+ 'title': 'Queer: Bogotart (Queer)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 1558.3,
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # iframe embed
+ 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data_url = update_url_query(unescapeHTML(self._search_regex(
+ (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
+ webpage, 'iframe player url', group='url')), query={
+ 'autoStart': 'true',
+ })
+
+ return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
diff --git a/hypervideo_dl/extractor/channel9.py b/hypervideo_dl/extractor/channel9.py
new file mode 100644
index 0000000..09cacf6
--- /dev/null
+++ b/hypervideo_dl/extractor/channel9.py
@@ -0,0 +1,262 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+ unescapeHTML,
+)
+
+
+class Channel9IE(InfoExtractor):
+ IE_DESC = 'Channel 9'
+ IE_NAME = 'channel9'
+ _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
+
+ _TESTS = [{
+ 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
+ 'md5': '32083d4eaf1946db6d454313f44510ca',
+ 'info_dict': {
+ 'id': '6c413323-383a-49dc-88f9-a22800cab024',
+ 'ext': 'wmv',
+ 'title': 'Developer Kick-Off Session: Stuff We Love',
+ 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
+ 'duration': 4576,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'timestamp': 1377717420,
+ 'upload_date': '20130828',
+ 'session_code': 'KOS002',
+ 'session_room': 'Arena 1A',
+ 'session_speakers': 'count:5',
+ },
+ }, {
+ 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
+ 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
+ 'info_dict': {
+ 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
+ 'ext': 'wmv',
+ 'title': 'Self-service BI with Power BI - nuclear testing',
+ 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
+ 'duration': 1540,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'timestamp': 1386381991,
+ 'upload_date': '20131207',
+ 'authors': ['Mike Wilmot'],
+ },
+ }, {
+ # low quality mp4 is best
+ 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+ 'info_dict': {
+ 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
+ 'ext': 'mp4',
+ 'title': 'Ranges for the Standard Library',
+ 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
+ 'duration': 5646,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'upload_date': '20150930',
+ 'timestamp': 1443640735,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
+ 'info_dict': {
+ 'id': 'Events/DEVintersection/DEVintersection-2016',
+ 'title': 'DEVintersection 2016 Orlando Sessions',
+ },
+ 'playlist_mincount': 14,
+ }, {
+ 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
+ 'only_matching': True,
+ }]
+
+ _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
+ webpage)
+
+ def _extract_list(self, video_id, rss_url=None):
+ if not rss_url:
+ rss_url = self._RSS_URL % video_id
+ rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
+ entries = [self.url_result(session_url.text, 'Channel9')
+ for session_url in rss.findall('./channel/item/link')]
+ title_text = rss.find('./channel/title').text
+ return self.playlist_result(entries, video_id, title_text)
+
+ def _real_extract(self, url):
+ content_path, rss = re.match(self._VALID_URL, url).groups()
+
+ if rss:
+ return self._extract_list(content_path, url)
+
+ webpage = self._download_webpage(
+ url, content_path, 'Downloading web page')
+
+ episode_data = self._search_regex(
+ r"data-episode='([^']+)'", webpage, 'episode data', default=None)
+ if episode_data:
+ episode_data = self._parse_json(unescapeHTML(
+ episode_data), content_path)
+ content_id = episode_data['contentId']
+ is_session = '/Sessions(' in episode_data['api']
+ content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
+ if is_session:
+ content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
+ else:
+ content_url += 'Authors,Body&$expand=Authors'
+ content_data = self._download_json(content_url, content_id)
+ title = content_data['Title']
+
+ QUALITIES = (
+ 'mp3',
+ 'wmv', 'mp4',
+ 'wmv-low', 'mp4-low',
+ 'wmv-mid', 'mp4-mid',
+ 'wmv-high', 'mp4-high',
+ )
+
+ quality_key = qualities(QUALITIES)
+
+ def quality(quality_id, format_url):
+ return (len(QUALITIES) if '_Source.' in format_url
+ else quality_key(quality_id))
+
+ formats = []
+ urls = set()
+
+ SITE_QUALITIES = {
+ 'MP3': 'mp3',
+ 'MP4': 'mp4',
+ 'Low Quality WMV': 'wmv-low',
+ 'Low Quality MP4': 'mp4-low',
+ 'Mid Quality WMV': 'wmv-mid',
+ 'Mid Quality MP4': 'mp4-mid',
+ 'High Quality WMV': 'wmv-high',
+ 'High Quality MP4': 'mp4-high',
+ }
+
+ formats_select = self._search_regex(
+ r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
+ 'formats select', default=None)
+ if formats_select:
+ for mobj in re.finditer(
+ r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
+ formats_select):
+ format_url = mobj.group('url')
+ if format_url in urls:
+ continue
+ urls.add(format_url)
+ format_id = mobj.group('format')
+ quality_id = SITE_QUALITIES.get(format_id, format_id)
+ formats.append({
+ 'url': format_url,
+ 'format_id': quality_id,
+ 'quality': quality(quality_id, format_url),
+ 'vcodec': 'none' if quality_id == 'mp3' else None,
+ })
+
+ API_QUALITIES = {
+ 'VideoMP4Low': 'mp4-low',
+ 'VideoWMV': 'wmv-mid',
+ 'VideoMP4Medium': 'mp4-mid',
+ 'VideoMP4High': 'mp4-high',
+ 'VideoWMVHQ': 'wmv-hq',
+ }
+
+ for format_id, q in API_QUALITIES.items():
+ q_url = content_data.get(format_id)
+ if not q_url or q_url in urls:
+ continue
+ urls.add(q_url)
+ formats.append({
+ 'url': q_url,
+ 'format_id': q,
+ 'quality': quality(q, q_url),
+ })
+
+ self._sort_formats(formats)
+
+ slides = content_data.get('Slides')
+ zip_file = content_data.get('ZipFile')
+
+ if not formats and not slides and not zip_file:
+ raise ExtractorError(
+ 'None of recording, slides or zip are available for %s' % content_path)
+
+ subtitles = {}
+ for caption in content_data.get('Captions', []):
+ caption_url = caption.get('Url')
+ if not caption_url:
+ continue
+ subtitles.setdefault(caption.get('Language', 'en'), []).append({
+ 'url': caption_url,
+ 'ext': 'vtt',
+ })
+
+ common = {
+ 'id': content_id,
+ 'title': title,
+ 'description': clean_html(content_data.get('Description') or content_data.get('Body')),
+ 'thumbnail': content_data.get('VideoPlayerPreviewImage'),
+ 'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
+ 'timestamp': parse_iso8601(content_data.get('PublishedDate')),
+ 'avg_rating': int_or_none(content_data.get('Rating')),
+ 'rating_count': int_or_none(content_data.get('RatingCount')),
+ 'view_count': int_or_none(content_data.get('Views')),
+ 'comment_count': int_or_none(content_data.get('CommentCount')),
+ 'subtitles': subtitles,
+ }
+ if is_session:
+ speakers = []
+ for s in content_data.get('Speakers', []):
+ speaker_name = s.get('FullName')
+ if not speaker_name:
+ continue
+ speakers.append(speaker_name)
+
+ common.update({
+ 'session_code': content_data.get('Code'),
+ 'session_room': content_data.get('Room'),
+ 'session_speakers': speakers,
+ })
+ else:
+ authors = []
+ for a in content_data.get('Authors', []):
+ author_name = a.get('DisplayName')
+ if not author_name:
+ continue
+ authors.append(author_name)
+ common['authors'] = authors
+
+ contents = []
+
+ if slides:
+ d = common.copy()
+ d.update({'title': title + '-Slides', 'url': slides})
+ contents.append(d)
+
+ if zip_file:
+ d = common.copy()
+ d.update({'title': title + '-Zip', 'url': zip_file})
+ contents.append(d)
+
+ if formats:
+ d = common.copy()
+ d.update({'title': title, 'formats': formats})
+ contents.append(d)
+ return self.playlist_result(contents)
+ else:
+ return self._extract_list(content_path)
diff --git a/hypervideo_dl/extractor/charlierose.py b/hypervideo_dl/extractor/charlierose.py
new file mode 100644
index 0000000..42c9af2
--- /dev/null
+++ b/hypervideo_dl/extractor/charlierose.py
@@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class CharlieRoseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://charlierose.com/videos/27996',
+ 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
+ 'info_dict': {
+ 'id': '27996',
+ 'ext': 'mp4',
+ 'title': 'Remembering Zaha Hadid',
+ 'thumbnail': r're:^https?://.*\.jpg\?\d+',
+ 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'vtt',
+ }],
+ },
+ },
+ }, {
+ 'url': 'https://charlierose.com/videos/27996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://charlierose.com/episodes/30887?autoplay=true',
+ 'only_matching': True,
+ }]
+
+ _PLAYER_BASE = 'https://charlierose.com/video/player/%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id)
+
+ title = remove_end(self._og_search_title(webpage), ' - Charlie Rose')
+
+ info_dict = self._parse_html5_media_entries(
+ self._PLAYER_BASE % video_id, webpage, video_id,
+ m3u8_entry_protocol='m3u8_native')[0]
+
+ self._sort_formats(info_dict['formats'])
+ self._remove_duplicate_formats(info_dict['formats'])
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/chaturbate.py b/hypervideo_dl/extractor/chaturbate.py
new file mode 100644
index 0000000..a459dcb
--- /dev/null
+++ b/hypervideo_dl/extractor/chaturbate.py
@@ -0,0 +1,109 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ lowercase_escape,
+ url_or_none,
+)
+
+
+class ChaturbateIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.chaturbate.com/siswet19/',
+ 'info_dict': {
+ 'id': 'siswet19',
+ 'ext': 'mp4',
+ 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'age_limit': 18,
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Room is offline',
+ }, {
+ 'url': 'https://chaturbate.com/fullvideo/?b=caylin',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://en.chaturbate.com/siswet19/',
+ 'only_matching': True,
+ }]
+
+ _ROOM_OFFLINE = 'Room is currently offline'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://chaturbate.com/%s/' % video_id, video_id,
+ headers=self.geo_verification_headers())
+
+ found_m3u8_urls = []
+
+ data = self._parse_json(
+ self._search_regex(
+ r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'data', default='{}', group='value'),
+ video_id, transform_source=lowercase_escape, fatal=False)
+ if data:
+ m3u8_url = url_or_none(data.get('hls_source'))
+ if m3u8_url:
+ found_m3u8_urls.append(m3u8_url)
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(lowercase_escape(m.group('url')))
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(m.group('url'))
+
+ m3u8_urls = []
+ for found_m3u8_url in found_m3u8_urls:
+ m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
+ for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
+ if m3u8_url not in m3u8_urls:
+ m3u8_urls.append(m3u8_url)
+
+ if not m3u8_urls:
+ error = self._search_regex(
+ [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
+ r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
+ webpage, 'error', group='error', default=None)
+ if not error:
+ if any(p in webpage for p in (
+ self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
+ error = self._ROOM_OFFLINE
+ if error:
+ raise ExtractorError(error, expected=True)
+ raise ExtractorError('Unable to find stream URL')
+
+ formats = []
+ for m3u8_url in m3u8_urls:
+ for known_id in ('fast', 'slow'):
+ if '_%s' % known_id in m3u8_url:
+ m3u8_id = known_id
+ break
+ else:
+ m3u8_id = None
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4',
+ # ffmpeg skips segments for fast m3u8
+ preference=-10 if m3u8_id == 'fast' else None,
+ m3u8_id=m3u8_id, fatal=False, live=True))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(video_id),
+ 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
+ 'age_limit': self._rta_search(webpage),
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/chilloutzone.py b/hypervideo_dl/extractor/chilloutzone.py
new file mode 100644
index 0000000..5aac212
--- /dev/null
+++ b/hypervideo_dl/extractor/chilloutzone.py
@@ -0,0 +1,96 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import compat_b64decode
+from ..utils import (
+ clean_html,
+ ExtractorError
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+ 'md5': 'a76f3457e813ea0037e5244f509e66d1',
+ 'info_dict': {
+ 'id': 'enemene-meck-alle-katzen-weg',
+ 'ext': 'mp4',
+ 'title': 'Enemene Meck - Alle Katzen weg',
+ 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+ },
+ }, {
+ 'note': 'Video hosted at YouTube',
+ 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+ 'info_dict': {
+ 'id': '1YVQaAgHyRU',
+ 'ext': 'mp4',
+ 'title': '16 Photos Taken 1 Second Before Disaster',
+ 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+ 'uploader': 'BuzzFeedVideo',
+ 'uploader_id': 'BuzzFeedVideo',
+ 'upload_date': '20131105',
+ },
+ }, {
+ 'note': 'Video hosted at Vimeo',
+ 'url': 'http://www.chilloutzone.net/video/icon-blending.html',
+ 'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
+ 'info_dict': {
+ 'id': '85523671',
+ 'ext': 'mp4',
+ 'title': 'The Sunday Times - Icons',
+ 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',
+ 'uploader': 'Us',
+ 'uploader_id': 'usfilms',
+ 'upload_date': '20140131'
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ base64_video_info = self._html_search_regex(
+ r'var cozVidData = "(.+?)";', webpage, 'video data')
+ decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
+ video_info_dict = json.loads(decoded_video_info)
+
+ # get video information from dict
+ video_url = video_info_dict['mediaUrl']
+ description = clean_html(video_info_dict.get('description'))
+ title = video_info_dict['title']
+ native_platform = video_info_dict['nativePlatform']
+ native_video_id = video_info_dict['nativeVideoId']
+ source_priority = video_info_dict['sourcePriority']
+
+ # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
+ if native_platform is None:
+ youtube_url = YoutubeIE._extract_url(webpage)
+ if youtube_url:
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+
+ # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
+ # the own CDN
+ if source_priority == 'native':
+ if native_platform == 'youtube':
+ return self.url_result(native_video_id, ie='Youtube')
+ if native_platform == 'vimeo':
+ return self.url_result(
+ 'http://vimeo.com/' + native_video_id, ie='Vimeo')
+
+ if not video_url:
+ raise ExtractorError('No video found')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': title,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/chirbit.py b/hypervideo_dl/extractor/chirbit.py
new file mode 100644
index 0000000..8d75cdf
--- /dev/null
+++ b/hypervideo_dl/extractor/chirbit.py
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_b64decode
+from ..utils import parse_duration
+
+
+class ChirbitIE(InfoExtractor):
+ IE_NAME = 'chirbit'
+ _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://chirb.it/be2abG',
+ 'info_dict': {
+ 'id': 'be2abG',
+ 'ext': 'mp3',
+ 'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
+ 'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
+ 'duration': 306,
+ 'uploader': 'Gerryaudio',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://chirb.it/wp/MN58c2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://chirb.it/%s' % audio_id, audio_id)
+
+ data_fd = self._search_regex(
+ r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'data fd', group='url')
+
+ # Reverse engineered from https://chirb.it/js/chirbit.player.js (look
+ # for soundURL)
+ audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
+
+ title = self._search_regex(
+ r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
+ description = self._search_regex(
+ r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'class=["\']c-length["\'][^>]*>([^<]+)',
+ webpage, 'duration', fatal=False))
+ uploader = self._search_regex(
+ r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
+ webpage, 'uploader', fatal=False)
+
+ return {
+ 'id': audio_id,
+ 'url': audio_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ }
+
+
+class ChirbitProfileIE(InfoExtractor):
+ IE_NAME = 'chirbit:profile'
+ _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://chirbit.com/ScarletBeauty',
+ 'info_dict': {
+ 'id': 'ScarletBeauty',
+ },
+ 'playlist_mincount': 3,
+ }
+
+ def _real_extract(self, url):
+ profile_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, profile_id)
+
+ entries = [
+ self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
+ for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
+
+ return self.playlist_result(entries, profile_id)
diff --git a/hypervideo_dl/extractor/cinchcast.py b/hypervideo_dl/extractor/cinchcast.py
new file mode 100644
index 0000000..b861d54
--- /dev/null
+++ b/hypervideo_dl/extractor/cinchcast.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ xpath_text,
+)
+
+
+class CinchcastIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
+ 'info_dict': {
+ 'id': '5258197',
+ 'ext': 'mp3',
+ 'title': 'Train Your Brain to Up Your Game with Coach Mandy',
+ 'upload_date': '20130816',
+ },
+ }, {
+ # Actual test is run in generic, look for undergroundwellness
+ 'url': 'http://player.cinchcast.com/?platformId=1&#038;assetType=single&#038;assetId=7141703',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ doc = self._download_xml(
+ 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
+ video_id)
+
+ item = doc.find('.//item')
+ title = xpath_text(item, './title', fatal=True)
+ date_str = xpath_text(
+ item, './{http://developer.longtailvideo.com/trac/}date')
+ upload_date = unified_strdate(date_str, day_first=False)
+ # duration is present but wrong
+ formats = [{
+ 'format_id': 'main',
+ 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
+ }]
+ backup_url = xpath_text(
+ item, './{http://developer.longtailvideo.com/trac/}backupContent')
+ if backup_url:
+ formats.append({
+ 'preference': 2, # seems to be more reliable
+ 'format_id': 'backup',
+ 'url': backup_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cinemax.py b/hypervideo_dl/extractor/cinemax.py
new file mode 100644
index 0000000..7f89d33
--- /dev/null
+++ b/hypervideo_dl/extractor/cinemax.py
@@ -0,0 +1,29 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .hbo import HBOBaseIE
+
+
+class CinemaxIE(HBOBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
+ 'md5': '82e0734bba8aa7ef526c9dd00cf35a05',
+ 'info_dict': {
+ 'id': '20126903',
+ 'ext': 'mp4',
+ 'title': 'S1 Ep 1: Recap',
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }, {
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
+ info['id'] = video_id
+ return info
diff --git a/hypervideo_dl/extractor/ciscolive.py b/hypervideo_dl/extractor/ciscolive.py
new file mode 100644
index 0000000..da404e4
--- /dev/null
+++ b/hypervideo_dl/extractor/ciscolive.py
@@ -0,0 +1,151 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class CiscoLiveBaseIE(InfoExtractor):
+ # These appear to be constant across all Cisco Live presentations
+ # and are not tied to any user session or event
+ RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
+ RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
+ RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
+
+ HEADERS = {
+ 'Origin': 'https://ciscolive.cisco.com',
+ 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
+ 'rfWidgetId': RAINFOCUS_WIDGET_ID,
+ }
+
+ def _call_api(self, ep, rf_id, query, referrer, note=None):
+ headers = self.HEADERS.copy()
+ headers['Referer'] = referrer
+ return self._download_json(
+ self.RAINFOCUS_API_URL % ep, rf_id, note=note,
+ data=urlencode_postdata(query), headers=headers)
+
+ def _parse_rf_item(self, rf_item):
+ event_name = rf_item.get('eventName')
+ title = rf_item['title']
+ description = clean_html(rf_item.get('abstract'))
+ presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
+ bc_id = rf_item['videos'][0]['url']
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
+ duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
+ location = try_get(rf_item, lambda x: x['times'][0]['room'])
+
+ if duration:
+ duration = duration * 60
+
+ return {
+ '_type': 'url_transparent',
+ 'url': bc_url,
+ 'ie_key': 'BrightcoveNew',
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'creator': presenter_name,
+ 'location': location,
+ 'series': event_name,
+ }
+
+
+class CiscoLiveSessionIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P<id>[^/?&]+)'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
+ 'md5': 'c98acf395ed9c9f766941c70f5352e22',
+ 'info_dict': {
+ 'id': '5803694304001',
+ 'ext': 'mp4',
+ 'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
+ 'description': 'md5:ec4a436019e09a918dec17714803f7cc',
+ 'timestamp': 1530305395,
+ 'upload_date': '20180629',
+ 'uploader_id': '5647924234001',
+ 'location': '16B Mezz.',
+ },
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ rf_id = self._match_id(url)
+ rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
+ return self._parse_rf_item(rf_result['items'][0])
+
+
+class CiscoLiveSearchIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
+ 'info_dict': {
+ 'title': 'Search query',
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
+
+ @staticmethod
+ def _check_bc_id_exists(rf_item):
+ return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
+
+ def _entries(self, query, url):
+ query['size'] = 50
+ query['from'] = 0
+ for page_num in itertools.count(1):
+ results = self._call_api(
+ 'search', None, query, url,
+ 'Downloading search JSON page %d' % page_num)
+ sl = try_get(results, lambda x: x['sectionList'][0], dict)
+ if sl:
+ results = sl
+ items = results.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ if not self._check_bc_id_exists(item):
+ continue
+ yield self._parse_rf_item(item)
+ size = int_or_none(results.get('size'))
+ if size is not None:
+ query['size'] = size
+ total = int_or_none(results.get('total'))
+ if total is not None and query['from'] + query['size'] > total:
+ break
+ query['from'] += query['size']
+
+ def _real_extract(self, url):
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ query['type'] = 'session'
+ return self.playlist_result(
+ self._entries(query, url), playlist_title='Search query')
diff --git a/hypervideo_dl/extractor/cjsw.py b/hypervideo_dl/extractor/cjsw.py
new file mode 100644
index 0000000..505bdbe
--- /dev/null
+++ b/hypervideo_dl/extractor/cjsw.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ unescapeHTML,
+)
+
+
+class CJSWIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
+ 'md5': 'cee14d40f1e9433632c56e3d14977120',
+ 'info_dict': {
+ 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
+ 'ext': 'mp3',
+ 'title': 'Freshly Squeezed – Episode June 20, 2017',
+ 'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
+ 'series': 'Freshly Squeezed',
+ 'episode_id': '20170620',
+ },
+ }, {
+ # no description
+ 'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ program, episode_id = mobj.group('program', 'id')
+ audio_id = '%s/%s' % (program, episode_id)
+
+ webpage = self._download_webpage(url, episode_id)
+
+ title = unescapeHTML(self._search_regex(
+ (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
+ r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='title'))
+
+ audio_url = self._search_regex(
+ r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'audio url', group='url')
+
+ audio_id = self._search_regex(
+ r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
+ audio_url, 'audio id', default=audio_id)
+
+ formats = [{
+ 'url': audio_url,
+ 'ext': determine_ext(audio_url, 'mp3'),
+ 'vcodec': 'none',
+ }]
+
+ description = self._html_search_regex(
+ r'<p>(?P<description>.+?)</p>', webpage, 'description',
+ default=None)
+ series = self._search_regex(
+ r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
+ 'series', default=program, group='name')
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'series': series,
+ 'episode_id': episode_id,
+ }
diff --git a/hypervideo_dl/extractor/cliphunter.py b/hypervideo_dl/extractor/cliphunter.py
new file mode 100644
index 0000000..f2ca7a3
--- /dev/null
+++ b/hypervideo_dl/extractor/cliphunter.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ url_or_none,
+)
+
+
+class CliphunterIE(InfoExtractor):
+ IE_NAME = 'cliphunter'
+
+ _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
+ (?P<id>[0-9]+)/
+ (?P<seo>.+?)(?:$|[#\?])
+ '''
+ _TESTS = [{
+ 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
+ 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
+ 'info_dict': {
+ 'id': '1012420',
+ 'ext': 'flv',
+ 'title': 'Fun Jynx Maze solo',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
+ 'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
+ 'info_dict': {
+ 'id': '2019449',
+ 'ext': 'mp4',
+ 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._search_regex(
+ r'mediaTitle = "([^"]+)"', webpage, 'title')
+
+ gexo_files = self._parse_json(
+ self._search_regex(
+ r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
+ video_id)
+
+ formats = []
+ for format_id, f in gexo_files.items():
+ video_url = url_or_none(f.get('url'))
+ if not video_url:
+ continue
+ fmt = f.get('fmt')
+ height = f.get('h')
+ format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': int_or_none(f.get('w')),
+ 'height': int_or_none(height),
+ 'tbr': int_or_none(f.get('br')),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._search_regex(
+ r"var\s+mov_thumb\s*=\s*'([^']+)';",
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'formats': formats,
+ 'age_limit': self._rta_search(webpage),
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/clippit.py b/hypervideo_dl/extractor/clippit.py
new file mode 100644
index 0000000..a1a7a77
--- /dev/null
+++ b/hypervideo_dl/extractor/clippit.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ qualities,
+)
+
+import re
+
+
+class ClippitIE(InfoExtractor):
+
+ _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
+ _TEST = {
+ 'url': 'https://www.clippituser.tv/c/evmgm',
+ 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
+ 'info_dict': {
+ 'id': 'evmgm',
+ 'ext': 'mp4',
+ 'title': 'Bye bye Brutus. #BattleBots - Clippit',
+ 'uploader': 'lizllove',
+ 'uploader_url': 'https://www.clippituser.tv/p/lizllove',
+ 'timestamp': 1472183818,
+ 'upload_date': '20160826',
+ 'description': 'BattleBots | ABC',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
+
+ FORMATS = ('sd', 'hd')
+ quality = qualities(FORMATS)
+ formats = []
+ for format_id in FORMATS:
+ url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
+ webpage, 'url', fatal=False)
+ if not url:
+ continue
+ match = re.search(r'/(?P<height>\d+)\.mp4', url)
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'height': int(match.group('height')) if match else None,
+ })
+
+ uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
+ webpage, 'uploader', fatal=False)
+ uploader_url = ('https://www.clippituser.tv/p/' + uploader
+ if uploader else None)
+
+ timestamp = self._html_search_regex(r'datetime="(.+?)"',
+ webpage, 'date', fatal=False)
+ thumbnail = self._html_search_regex(r'data-image="(.+?)"',
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ 'timestamp': parse_iso8601(timestamp),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/cliprs.py b/hypervideo_dl/extractor/cliprs.py
new file mode 100644
index 0000000..d55b26d
--- /dev/null
+++ b/hypervideo_dl/extractor/cliprs.py
@@ -0,0 +1,33 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .onet import OnetBaseIE
+
+
+class ClipRsIE(OnetBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
+ _TEST = {
+ 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
+ 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
+ 'info_dict': {
+ 'id': '1488842.1399140381',
+ 'ext': 'mp4',
+ 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
+ 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
+ 'duration': 229,
+ 'timestamp': 1459850243,
+ 'upload_date': '20160405',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ mvp_id = self._search_mvp_id(webpage)
+
+ info_dict = self._extract_from_id(mvp_id, webpage)
+ info_dict['display_id'] = display_id
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/clipsyndicate.py b/hypervideo_dl/extractor/clipsyndicate.py
new file mode 100644
index 0000000..6cdb42f
--- /dev/null
+++ b/hypervideo_dl/extractor/clipsyndicate.py
@@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ find_xpath_attr,
+ fix_xml_ampersands
+)
+
+
+class ClipsyndicateIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
+ 'md5': '4d7d549451bad625e0ff3d7bd56d776c',
+ 'info_dict': {
+ 'id': '4629301',
+ 'ext': 'mp4',
+ 'title': 'Brick Briscoe',
+ 'duration': 612,
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ }, {
+ 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ js_player = self._download_webpage(
+ 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
+ video_id, 'Downlaoding player')
+ # it includes a required token
+ flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
+
+ pdoc = self._download_xml(
+ 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
+ video_id, 'Downloading video info',
+ transform_source=fix_xml_ampersands)
+
+ track_doc = pdoc.find('trackList/track')
+
+ def find_param(name):
+ node = find_xpath_attr(track_doc, './/param', 'name', name)
+ if node is not None:
+ return node.attrib['value']
+
+ return {
+ 'id': video_id,
+ 'title': find_param('title'),
+ 'url': track_doc.find('location').text,
+ 'thumbnail': find_param('thumbnail'),
+ 'duration': int(find_param('duration')),
+ }
diff --git a/hypervideo_dl/extractor/closertotruth.py b/hypervideo_dl/extractor/closertotruth.py
new file mode 100644
index 0000000..26243d5
--- /dev/null
+++ b/hypervideo_dl/extractor/closertotruth.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CloserToTruthIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
+ 'info_dict': {
+ 'id': '0_zof1ktre',
+ 'display_id': 'solutions-the-mind-body-problem',
+ 'ext': 'mov',
+ 'title': 'Solutions to the Mind-Body Problem?',
+ 'upload_date': '20140221',
+ 'timestamp': 1392956007,
+ 'uploader_id': 'CTTXML'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
+ 'info_dict': {
+ 'id': '0_iuxai6g6',
+ 'display_id': 'how-do-brains-work',
+ 'ext': 'mov',
+ 'title': 'How do Brains Work?',
+ 'upload_date': '20140221',
+ 'timestamp': 1392956024,
+ 'uploader_id': 'CTTXML'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://closertotruth.com/interviews/1725',
+ 'info_dict': {
+ 'id': '1725',
+ 'title': 'AyaFr-002',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ partner_id = self._search_regex(
+ r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
+ webpage, 'kaltura partner_id')
+
+ title = self._search_regex(
+ r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
+
+ select = self._search_regex(
+ r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
+ webpage, 'select version', default=None)
+ if select:
+ entry_ids = set()
+ entries = []
+ for mobj in re.finditer(
+ r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
+ webpage):
+ entry_id = mobj.group('id')
+ if entry_id in entry_ids:
+ continue
+ entry_ids.add(entry_id)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+ 'ie_key': 'Kaltura',
+ 'title': mobj.group('title'),
+ })
+ if entries:
+ return self.playlist_result(entries, display_id, title)
+
+ entry_id = self._search_regex(
+ r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
+ webpage, 'kaltura entry_id', group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+ 'ie_key': 'Kaltura',
+ 'title': title
+ }
diff --git a/hypervideo_dl/extractor/cloudflarestream.py b/hypervideo_dl/extractor/cloudflarestream.py
new file mode 100644
index 0000000..2fdcfbb
--- /dev/null
+++ b/hypervideo_dl/extractor/cloudflarestream.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import re
+
+from .common import InfoExtractor
+
+
+class CloudflareStreamIE(InfoExtractor):
+ _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
+ _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
+ _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:watch\.)?%s/|
+ %s
+ )
+ (?P<id>%s)
+ ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
+ _TESTS = [{
+ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
+ base_url = 'https://%s/%s/' % (domain, video_id)
+ if '.' in video_id:
+ video_id = self._parse_json(base64.urlsafe_b64decode(
+ video_id.split('.')[1]), video_id)['sub']
+ manifest_base_url = base_url + 'manifest/video.'
+
+ formats = self._extract_m3u8_formats(
+ manifest_base_url + 'm3u8', video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cloudy.py b/hypervideo_dl/extractor/cloudy.py
new file mode 100644
index 0000000..85ca20e
--- /dev/null
+++ b/hypervideo_dl/extractor/cloudy.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+ unified_strdate,
+)
+
+
+class CloudyIE(InfoExtractor):
+ _IE_DESC = 'cloudy.ec'
+ _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.cloudy.ec/v/af511e2527aac',
+ 'md5': '29832b05028ead1b58be86bf319397ca',
+ 'info_dict': {
+ 'id': 'af511e2527aac',
+ 'ext': 'mp4',
+ 'title': 'Funny Cats and Animals Compilation june 2013',
+ 'upload_date': '20130913',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.cloudy.ec/embed.php', video_id, query={
+ 'id': video_id,
+ 'playerPage': 1,
+ 'autoplay': 1,
+ })
+
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ webpage = self._download_webpage(
+ 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
+
+ if webpage:
+ info.update({
+ 'title': self._search_regex(
+ r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
+ 'upload date', fatal=False)),
+ 'view_count': str_to_int(self._search_regex(
+ r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
+ })
+
+ if not info.get('title'):
+ info['title'] = video_id
+
+ info['id'] = video_id
+
+ return info
diff --git a/hypervideo_dl/extractor/clubic.py b/hypervideo_dl/extractor/clubic.py
new file mode 100644
index 0000000..98f9cb5
--- /dev/null
+++ b/hypervideo_dl/extractor/clubic.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ qualities,
+)
+
+
+class ClubicIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
+ 'md5': '1592b694ba586036efac1776b0b43cd3',
+ 'info_dict': {
+ 'id': '448474',
+ 'ext': 'mp4',
+ 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
+ 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
+ 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$',
+ }
+ }, {
+ 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
+ player_page = self._download_webpage(player_url, video_id)
+
+ config = self._parse_json(self._search_regex(
+ r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
+ 'configuration'), video_id)
+
+ video_info = config['videoInfo']
+ sources = config['sources']
+ quality_order = qualities(['sd', 'hq'])
+
+ formats = [{
+ 'format_id': src['streamQuality'],
+ 'url': src['src'],
+ 'quality': quality_order(src['streamQuality']),
+ } for src in sources]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_info['title'],
+ 'formats': formats,
+ 'description': clean_html(video_info.get('description')),
+ 'thumbnail': config.get('poster'),
+ }
diff --git a/hypervideo_dl/extractor/clyp.py b/hypervideo_dl/extractor/clyp.py
new file mode 100644
index 0000000..06d04de
--- /dev/null
+++ b/hypervideo_dl/extractor/clyp.py
@@ -0,0 +1,82 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ float_or_none,
+ unified_timestamp,
+)
+
+
+class ClypIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://clyp.it/ojz2wfah',
+ 'md5': '1d4961036c41247ecfdcc439c0cddcbb',
+ 'info_dict': {
+ 'id': 'ojz2wfah',
+ 'ext': 'mp3',
+ 'title': 'Krisson80 - bits wip wip',
+ 'description': '#Krisson80BitsWipWip #chiptune\n#wip',
+ 'duration': 263.21,
+ 'timestamp': 1443515251,
+ 'upload_date': '20150929',
+ },
+ }, {
+ 'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
+ 'info_dict': {
+ 'id': 'b04p1odi',
+ 'ext': 'mp3',
+ 'title': 'GJ! (Reward Edit)',
+ 'description': 'Metal Resistance (THE ONE edition)',
+ 'duration': 177.789,
+ 'timestamp': 1528241278,
+ 'upload_date': '20180605',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ token = qs.get('token', [None])[0]
+
+ query = {}
+ if token:
+ query['token'] = token
+
+ metadata = self._download_json(
+ 'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
+
+ formats = []
+ for secure in ('', 'Secure'):
+ for ext in ('Ogg', 'Mp3'):
+ format_id = '%s%s' % (secure, ext)
+ format_url = metadata.get('%sUrl' % format_id)
+ if format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ title = metadata['Title']
+ description = metadata.get('Description')
+ duration = float_or_none(metadata.get('Duration'))
+ timestamp = unified_timestamp(metadata.get('DateCreated'))
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cmt.py b/hypervideo_dl/extractor/cmt.py
new file mode 100644
index 0000000..e701fbe
--- /dev/null
+++ b/hypervideo_dl/extractor/cmt.py
@@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+
+from .mtv import MTVIE
+
+
+class CMTIE(MTVIE):
+ IE_NAME = 'cmt.com'
+ _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
+ 'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2',
+ 'info_dict': {
+ 'id': '989124',
+ 'ext': 'mp4',
+ 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
+ 'description': 'Blame It All On My Roots',
+ },
+ 'skip': 'Video not available',
+ }, {
+ 'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
+ 'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
+ 'info_dict': {
+ 'id': '1504699',
+ 'ext': 'mp4',
+ 'title': 'Still The King Ep. 109 in 3 Minutes',
+ 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
+ 'timestamp': 1469421000.0,
+ 'upload_date': '20160725',
+ },
+ }, {
+ 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes',
+ 'only_matching': True,
+ }]
+
+ def _extract_mgid(self, webpage):
+ mgid = self._search_regex(
+ r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
+ webpage, 'mgid', group='mgid', default=None)
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+ return mgid
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/hypervideo_dl/extractor/cnbc.py b/hypervideo_dl/extractor/cnbc.py
new file mode 100644
index 0000000..7b9f453
--- /dev/null
+++ b/hypervideo_dl/extractor/cnbc.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class CNBCIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://video.cnbc.com/gallery/?video=3000503714',
+ 'info_dict': {
+ 'id': '3000503714',
+ 'ext': 'mp4',
+ 'title': 'Fighting zombies is big business',
+ 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
+ 'timestamp': 1459332000,
+ 'upload_date': '20160330',
+ 'uploader': 'NBCU-CNBC',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
+ {'force_smil_url': True}),
+ 'id': video_id,
+ }
+
+
+class CNBCVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
+ _TEST = {
+ 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
+ 'info_dict': {
+ 'id': '7000031301',
+ 'ext': 'mp4',
+ 'title': "Trump: I don't necessarily agree with raising rates",
+ 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
+ 'timestamp': 1531958400,
+ 'upload_date': '20180719',
+ 'uploader': 'NBCU-CNBC',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ path, display_id = re.match(self._VALID_URL, url).groups()
+ video_id = self._download_json(
+ 'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
+ 'query': '''{
+ page(path: "%s") {
+ vcpsId
+ }
+}''' % path,
+ })['data']['page']['vcpsId']
+ return self.url_result(
+ 'http://video.cnbc.com/gallery/?video=%d' % video_id,
+ CNBCIE.ie_key())
diff --git a/hypervideo_dl/extractor/cnn.py b/hypervideo_dl/extractor/cnn.py
new file mode 100644
index 0000000..2d950fa
--- /dev/null
+++ b/hypervideo_dl/extractor/cnn.py
@@ -0,0 +1,147 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .turner import TurnerBaseIE
+from ..utils import url_basename
+
+
+class CNNIE(TurnerBaseIE):
+ _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
+
+ _TESTS = [{
+ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+ 'md5': '3e6121ea48df7e2259fe73a0628605c4',
+ 'info_dict': {
+ 'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
+ 'ext': 'mp4',
+ 'title': 'Nadal wins 8th French Open title',
+ 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+ 'duration': 135,
+ 'upload_date': '20130609',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
+ 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
+ 'info_dict': {
+ 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
+ 'ext': 'mp4',
+ 'title': "Student's epic speech stuns new freshmen",
+ 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
+ 'upload_date': '20130821',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
+ 'md5': 'f14d02ebd264df951feb2400e2c25a1b',
+ 'info_dict': {
+ 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
+ 'ext': 'mp4',
+ 'title': 'Nashville Ep. 1: Hand crafted skateboards',
+ 'description': 'md5:e7223a503315c9f150acac52e76de086',
+ 'upload_date': '20141222',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
+ 'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
+ 'info_dict': {
+ 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
+ 'ext': 'mp4',
+ 'title': '5 stunning stats about Netflix',
+ 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
+ 'upload_date': '20160819',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
+ 'only_matching': True,
+ }]
+
+ _CONFIG = {
+ # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
+ 'edition': {
+ 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
+ 'media_src': 'http://pmd.cdn.turner.com/cnn/big',
+ },
+ # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
+ 'money': {
+ 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
+ 'media_src': 'http://ht3.cdn.turner.com/money/big',
+ },
+ }
+
+ def _extract_timestamp(self, video_data):
+ # TODO: fix timestamp extraction
+ return None
+
+ def _real_extract(self, url):
+ sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
+ if sub_domain not in ('money', 'edition'):
+ sub_domain = 'edition'
+ config = self._CONFIG[sub_domain]
+ return self._extract_cvp_info(
+ config['data_src'] % path, page_title, {
+ 'default': {
+ 'media_src': config['media_src'],
+ },
+ 'f4m': {
+ 'host': 'cnn-vh.akamaihd.net',
+ },
+ })
+
+
+class CNNBlogsIE(InfoExtractor):
+ _VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
+ _TEST = {
+ 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
+ 'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
+ 'info_dict': {
+ 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
+ 'ext': 'mp4',
+ 'title': 'Criminalizing journalism?',
+ 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
+ 'upload_date': '20140209',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'add_ie': ['CNN'],
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, url_basename(url))
+ cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
+ return self.url_result(cnn_url, CNNIE.ie_key())
+
+
+class CNNArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
+ _TEST = {
+ 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
+ 'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
+ 'info_dict': {
+ 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
+ 'ext': 'mp4',
+ 'title': 'Obama: Cyberattack not an act of war',
+ 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
+ 'upload_date': '20141221',
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'add_ie': ['CNN'],
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, url_basename(url))
+ cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
+ return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
diff --git a/hypervideo_dl/extractor/comedycentral.py b/hypervideo_dl/extractor/comedycentral.py
new file mode 100644
index 0000000..1bfa912
--- /dev/null
+++ b/hypervideo_dl/extractor/comedycentral.py
@@ -0,0 +1,51 @@
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class ComedyCentralIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
+ _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+ 'md5': 'b8acb347177c680ff18a292aa2166f80',
+ 'info_dict': {
+ 'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
+ 'ext': 'mp4',
+ 'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+ 'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+ 'timestamp': 1598670000,
+ 'upload_date': '20200829',
+ },
+ }, {
+ 'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
+ 'only_matching': True,
+ }]
+
+
+class ComedyCentralTVIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
+ _TESTS = [{
+ 'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
+ 'info_dict': {
+ 'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Josh Investigates',
+ 'description': 'Steht uns das Ende der Welt bevor?',
+ },
+ }]
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['DE']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'web.cc.tv',
+ 'ep': 'b9032c3a',
+ 'imageEp': 'web.cc.tv',
+ 'mgid': uri,
+ }
diff --git a/hypervideo_dl/extractor/common.py b/hypervideo_dl/extractor/common.py
new file mode 100644
index 0000000..8b622be
--- /dev/null
+++ b/hypervideo_dl/extractor/common.py
@@ -0,0 +1,3064 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import datetime
+import hashlib
+import json
+import netrc
+import os
+import random
+import re
+import socket
+import ssl
+import sys
+import time
+import math
+
+from ..compat import (
+ compat_cookiejar_Cookie,
+ compat_cookies_SimpleCookie,
+ compat_etree_Element,
+ compat_etree_fromstring,
+ compat_getpass,
+ compat_integer_types,
+ compat_http_client,
+ compat_os_name,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlencode,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..downloader.f4m import (
+ get_base_url,
+ remove_encrypted_media,
+)
+from ..utils import (
+ NO_DEFAULT,
+ age_restricted,
+ base_url,
+ bug_reports_message,
+ clean_html,
+ compiled_regex_type,
+ determine_ext,
+ determine_protocol,
+ dict_get,
+ error_to_compat_str,
+ ExtractorError,
+ extract_attributes,
+ fix_xml_ampersands,
+ float_or_none,
+ GeoRestrictedError,
+ GeoUtils,
+ int_or_none,
+ js_to_json,
+ JSON_LD_RE,
+ mimetype2ext,
+ orderedSet,
+ parse_bitrate,
+ parse_codecs,
+ parse_duration,
+ parse_iso8601,
+ parse_m3u8_attributes,
+ parse_resolution,
+ RegexNotFoundError,
+ sanitized_Request,
+ sanitize_filename,
+ str_or_none,
+ str_to_int,
+ strip_or_none,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ update_Request,
+ update_url_query,
+ urljoin,
+ url_basename,
+ url_or_none,
+ xpath_element,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class InfoExtractor(object):
+ """Information Extractor class.
+
+ Information extractors are the classes that, given a URL, extract
+ information about the video (or videos) the URL refers to. This
+ information includes the real video URL, the video title, author and
+ others. The information is stored in a dictionary which is then
+ passed to the YoutubeDL. The YoutubeDL processes this
+ information possibly downloading the video to the file system, among
+ other possible outcomes.
+
+ The type field determines the type of the result.
+ By far the most common value (and the default if _type is missing) is
+ "video", which indicates a single video.
+
+ For a video, the dictionaries must include the following fields:
+
+ id: Video identifier.
+ title: Video title, unescaped.
+
+ Additionally, it must contain either a formats entry or a url one:
+
+ formats: A list of dictionaries for each format available, ordered
+ from worst to best quality.
+
+ Potential fields:
+ * url The mandatory URL representing the media:
+ for plain file media - HTTP URL of this file,
+ for RTMP - RTMP URL,
+ for HLS - URL of the M3U8 media playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH
+ - HTTP URL to plain file media (in case of
+ unfragmented media)
+ - URL of the MPD manifest or base URL
+ representing the media if MPD manifest
+ is parsed from a string (in case of
+ fragmented media)
+ for MSS - URL of the ISM manifest.
+ * manifest_url
+ The URL of the manifest file in case of
+ fragmented media:
+ for HLS - URL of the M3U8 master playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH - URL of the MPD manifest,
+ for MSS - URL of the ISM manifest.
+ * ext Will be calculated from URL if missing
+ * format A human-readable description of the format
+ ("mp4 container with h264/opus").
+ Calculated from the format_id, width, height.
+ and format_note fields if missing.
+ * format_id A short description of the format
+ ("mp4_h264_opus" or "19").
+ Technically optional, but strongly recommended.
+ * format_note Additional info about the format
+ ("3D" or "DASH video")
+ * width Width of the video, if known
+ * height Height of the video, if known
+ * resolution Textual description of width and height
+ * tbr Average bitrate of audio and video in KBit/s
+ * abr Average audio bitrate in KBit/s
+ * acodec Name of the audio codec in use
+ * asr Audio sampling rate in Hertz
+ * vbr Average video bitrate in KBit/s
+ * fps Frame rate
+ * vcodec Name of the video codec in use
+ * container Name of the container format
+ * filesize The number of bytes, if known in advance
+ * filesize_approx An estimate for the number of bytes
+ * player_url SWF Player URL (used for rtmpdump).
+ * protocol The protocol that will be used for the actual
+ download, lower-case.
+ "http", "https", "rtsp", "rtmp", "rtmpe",
+ "m3u8", "m3u8_native" or "http_dash_segments".
+ * fragment_base_url
+ Base URL for fragments. Each fragment's path
+ value (if present) will be relative to
+ this URL.
+ * fragments A list of fragments of a fragmented media.
+ Each fragment entry must contain either an url
+ or a path. If an url is present it should be
+ considered by a client. Otherwise both path and
+ fragment_base_url must be present. Here is
+ the list of all potential fields:
+ * "url" - fragment's URL
+ * "path" - fragment's path relative to
+ fragment_base_url
+ * "duration" (optional, int or float)
+ * "filesize" (optional, int)
+ * preference Order number of this format. If this field is
+ present and not None, the formats get sorted
+ by this field, regardless of all other values.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ < -1000 to hide the format (if there is
+ another one which is strictly better)
+ * language Language code, e.g. "de" or "en-US".
+ * language_preference Is this in the language mentioned in
+ the URL?
+ 10 if it's what the URL is about,
+ -1 for default (don't know),
+ -10 otherwise, other values reserved for now.
+ * quality Order number of the video quality of this
+ format, irrespective of the file format.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * source_preference Order number for this video source
+ (quality takes higher priority)
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * http_headers A dictionary of additional HTTP headers
+ to add to the request.
+ * stretched_ratio If given and not 1, indicates that the
+ video's pixels are not square.
+ width : height ratio as float.
+ * no_resume The server does not support resuming the
+ (HTTP or RTMP) download. Boolean.
+ * downloader_options A dictionary of downloader options as
+ described in FileDownloader
+
+ url: Final video URL.
+ ext: Video filename extension.
+ format: The video format, defaults to ext (used for --get-format)
+ player_url: SWF Player URL (used for rtmpdump).
+
+ The following fields are optional:
+
+ alt_title: A secondary title of the video.
+ display_id An alternative identifier for the video, not necessarily
+ unique, but available before title. Typically, id is
+ something like "4234987", title "Dancing naked mole rats",
+ and display_id "dancing-naked-mole-rats"
+ thumbnails: A list of dictionaries, with the following entries:
+ * "id" (optional, string) - Thumbnail format ID
+ * "url"
+ * "preference" (optional, int) - quality of the image
+ * "width" (optional, int)
+ * "height" (optional, int)
+ * "resolution" (optional, string "{width}x{height}",
+ deprecated)
+ * "filesize" (optional, int)
+ thumbnail: Full URL to a video thumbnail image.
+ description: Full video description.
+ uploader: Full name of the video uploader.
+ license: License name the video is licensed under.
+ creator: The creator of the video.
+ release_timestamp: UNIX timestamp of the moment the video was released.
+ release_date: The date (YYYYMMDD) when the video was released.
+ timestamp: UNIX timestamp of the moment the video became available
+ (uploaded).
+ upload_date: Video upload date (YYYYMMDD).
+ If not explicitly set, calculated from timestamp.
+ uploader_id: Nickname or id of the video uploader.
+ uploader_url: Full URL to a personal webpage of the video uploader.
+ channel: Full name of the channel the video is uploaded on.
+ Note that channel fields may or may not repeat uploader
+ fields. This depends on a particular extractor.
+ channel_id: Id of the channel.
+ channel_url: Full URL to a channel webpage.
+ location: Physical location where the video was filmed.
+ subtitles: The available subtitles as a dictionary in the format
+ {tag: subformats}. "tag" is usually a language code, and
+ "subformats" is a list sorted from lower to higher
+ preference, each element is a dictionary with the "ext"
+ entry and one of:
+ * "data": The subtitles file contents
+ * "url": A URL pointing to the subtitles file
+ "ext" will be calculated from URL if missing
+ automatic_captions: Like 'subtitles', used by the YoutubeIE for
+ automatically generated captions
+ duration: Length of the video in seconds, as an integer or float.
+ view_count: How many users have watched the video on the platform.
+ like_count: Number of positive ratings of the video
+ dislike_count: Number of negative ratings of the video
+ repost_count: Number of reposts of the video
+ average_rating: Average rating give by users, the scale used depends on the webpage
+ comment_count: Number of comments on the video
+ comments: A list of comments, each with one or more of the following
+ properties (all but one of text or html optional):
+ * "author" - human-readable name of the comment author
+ * "author_id" - user ID of the comment author
+ * "id" - Comment ID
+ * "html" - Comment as HTML
+ * "text" - Plain text of the comment
+ * "timestamp" - UNIX timestamp of comment
+ * "parent" - ID of the comment this one is replying to.
+ Set to "root" to indicate that this is a
+ comment to the original video.
+ age_limit: Age restriction for the video, as an integer (years)
+ webpage_url: The URL to the video webpage, if given to hypervideo it
+ should allow to get the same result again. (It will be set
+ by YoutubeDL if it's missing)
+ categories: A list of categories that the video falls in, for example
+ ["Sports", "Berlin"]
+ tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+ is_live: True, False, or None (=unknown). Whether this video is a
+ live stream that goes on instead of a fixed-length video.
+ start_time: Time in seconds where the reproduction should start, as
+ specified in the URL.
+ end_time: Time in seconds where the reproduction should end, as
+ specified in the URL.
+ chapters: A list of dictionaries, with the following entries:
+ * "start_time" - The start time of the chapter in seconds
+ * "end_time" - The end time of the chapter in seconds
+ * "title" (optional, string)
+
+ The following fields should only be used when the video belongs to some logical
+ chapter or section:
+
+ chapter: Name or title of the chapter the video belongs to.
+ chapter_number: Number of the chapter the video belongs to, as an integer.
+ chapter_id: Id of the chapter the video belongs to, as a unicode string.
+
+ The following fields should only be used when the video is an episode of some
+ series, programme or podcast:
+
+ series: Title of the series or programme the video episode belongs to.
+ season: Title of the season the video episode belongs to.
+ season_number: Number of the season the video episode belongs to, as an integer.
+ season_id: Id of the season the video episode belongs to, as a unicode string.
+ episode: Title of the video episode. Unlike mandatory video title field,
+ this field should denote the exact title of the video episode
+ without any kind of decoration.
+ episode_number: Number of the video episode within a season, as an integer.
+ episode_id: Id of the video episode, as a unicode string.
+
+ The following fields should only be used when the media is a track or a part of
+ a music album:
+
+ track: Title of the track.
+ track_number: Number of the track within an album or a disc, as an integer.
+ track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
+ as a unicode string.
+ artist: Artist(s) of the track.
+ genre: Genre(s) of the track.
+ album: Title of the album the track belongs to.
+ album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
+ album_artist: List of all artists appeared on the album (e.g.
+ "Ash Borer / Fell Voices" or "Various Artists", useful for splits
+ and compilations).
+ disc_number: Number of the disc or other physical medium the track belongs to,
+ as an integer.
+ release_year: Year (YYYY) when the album was released.
+
+ Unless mentioned otherwise, the fields should be Unicode strings.
+
+ Unless mentioned otherwise, None is equivalent to absence of information.
+
+
+ _type "playlist" indicates multiple videos.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
+
+ Additionally, playlists can have "id", "title", "description", "uploader",
+ "uploader_id", "uploader_url", "duration" attributes with the same semantics
+ as videos (see above).
+
+
+ _type "multi_video" indicates that there are multiple videos that
+ form a single show, for examples multiple acts of an opera or TV episode.
+ It must have an entries key like a playlist and contain all the keys
+ required for a video at the same time.
+
+
+ _type "url" indicates that the video must be extracted from another
+ location, possibly by a different extractor. Its only required key is:
+ "url" - the next URL to extract.
+ The key "ie_key" can be set to the class name (minus the trailing "IE",
+ e.g. "Youtube") if the extractor class is known in advance.
+ Additionally, the dictionary may have any properties of the resolved entity
+ known in advance, for example "title" if the title of the referred video is
+ known ahead of time.
+
+
+ _type "url_transparent" entities have the same specification as "url", but
+ indicate that the given additional information is more precise than the one
+ associated with the resolved URL.
+ This is useful when a site employs a video service that hosts the video and
+ its technical metadata, but that video service does not embed a useful
+ title, description etc.
+
+
+ Subclasses of this one should re-define the _real_initialize() and
+ _real_extract() methods and define a _VALID_URL regexp.
+ Probably, they should also be added to the list of extractors.
+
+ _GEO_BYPASS attribute may be set to False in order to disable
+ geo restriction bypass mechanisms for a particular extractor.
+ Though it won't disable explicit geo restriction bypass based on
+ country code provided with geo_bypass_country.
+
+ _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+ countries for this extractor. One of these countries will be used by
+ geo restriction bypass mechanism right away in order to bypass
+ geo restriction, of course, if the mechanism is not disabled.
+
+ _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+ IP blocks in CIDR notation for this extractor. One of these IP blocks
+ will be used by geo restriction bypass mechanism similarly
+ to _GEO_COUNTRIES.
+
+ Finally, the _WORKING attribute should be set to False for broken IEs
+ in order to warn the users and skip the tests.
+ """
+
+ _ready = False
+ _downloader = None
+ _x_forwarded_for_ip = None
+ _GEO_BYPASS = True
+ _GEO_COUNTRIES = None
+ _GEO_IP_BLOCKS = None
+ _WORKING = True
+
+ def __init__(self, downloader=None):
+ """Constructor. Receives an optional downloader."""
+ self._ready = False
+ self._x_forwarded_for_ip = None
+ self.set_downloader(downloader)
+
+ @classmethod
+ def suitable(cls, url):
+ """Receives a URL and returns True if suitable for this IE."""
+
+ # This does not use has/getattr intentionally - we want to know whether
+ # we have cached the regexp for *this* class, whereas getattr would also
+ # match the superclass
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ return cls._VALID_URL_RE.match(url) is not None
+
+ @classmethod
+ def _match_id(cls, url):
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ m = cls._VALID_URL_RE.match(url)
+ assert m
+ return compat_str(m.group('id'))
+
+ @classmethod
+ def working(cls):
+ """Getter method for _WORKING."""
+ return cls._WORKING
+
+ def initialize(self):
+ """Initializes an instance (authentication, etc)."""
+ self._initialize_geo_bypass({
+ 'countries': self._GEO_COUNTRIES,
+ 'ip_blocks': self._GEO_IP_BLOCKS,
+ })
+ if not self._ready:
+ self._real_initialize()
+ self._ready = True
+
+ def _initialize_geo_bypass(self, geo_bypass_context):
+ """
+ Initialize geo restriction bypass mechanism.
+
+ This method is used to initialize geo bypass mechanism based on faking
+ X-Forwarded-For HTTP header. A random country from provided country list
+ is selected and a random IP belonging to this country is generated. This
+ IP will be passed as X-Forwarded-For HTTP header in all subsequent
+ HTTP requests.
+
+ This method will be used for initial geo bypass mechanism initialization
+ during the instance initialization with _GEO_COUNTRIES and
+ _GEO_IP_BLOCKS.
+
+ You may also manually call it from extractor's code if geo bypass
+ information is not available beforehand (e.g. obtained during
+ extraction) or due to some other reason. In this case you should pass
+ this information in geo bypass context passed as first argument. It may
+ contain following fields:
+
+ countries: List of geo unrestricted countries (similar
+ to _GEO_COUNTRIES)
+ ip_blocks: List of geo unrestricted IP blocks in CIDR notation
+ (similar to _GEO_IP_BLOCKS)
+
+ """
+ if not self._x_forwarded_for_ip:
+
+ # Geo bypass mechanism is explicitly disabled by user
+ if not self._downloader.params.get('geo_bypass', True):
+ return
+
+ if not geo_bypass_context:
+ geo_bypass_context = {}
+
+ # Backward compatibility: previously _initialize_geo_bypass
+ # expected a list of countries, some 3rd party code may still use
+ # it this way
+ if isinstance(geo_bypass_context, (list, tuple)):
+ geo_bypass_context = {
+ 'countries': geo_bypass_context,
+ }
+
+ # The whole point of geo bypass mechanism is to fake IP
+ # as X-Forwarded-For HTTP header based on some IP block or
+ # country code.
+
+ # Path 1: bypassing based on IP block in CIDR notation
+
+ # Explicit IP block specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+
+ # Otherwise use random IP block from geo bypass context but only
+ # if extractor is known as geo bypassable
+ if not ip_block:
+ ip_blocks = geo_bypass_context.get('ip_blocks')
+ if self._GEO_BYPASS and ip_blocks:
+ ip_block = random.choice(ip_blocks)
+
+ if ip_block:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s as X-Forwarded-For.'
+ % self._x_forwarded_for_ip)
+ return
+
+ # Path 2: bypassing based on country code
+
+ # Explicit country code specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ country = self._downloader.params.get('geo_bypass_country', None)
+
+ # Otherwise use random country code from geo bypass context but
+ # only if extractor is known as geo bypassable
+ if not country:
+ countries = geo_bypass_context.get('countries')
+ if self._GEO_BYPASS and countries:
+ country = random.choice(countries)
+
+ if country:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country.upper()))
+
+ def extract(self, url):
+ """Extracts URL information and returns it in list of dicts."""
+ try:
+ for _ in range(2):
+ try:
+ self.initialize()
+ ie_result = self._real_extract(url)
+ if self._x_forwarded_for_ip:
+ ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ return ie_result
+ except GeoRestrictedError as e:
+ if self.__maybe_fake_ip_and_retry(e.countries):
+ continue
+ raise
+ except ExtractorError:
+ raise
+ except compat_http_client.IncompleteRead as e:
+ raise ExtractorError('A network error has occurred.', cause=e, expected=True)
+ except (KeyError, StopIteration) as e:
+ raise ExtractorError('An extractor error has occurred.', cause=e)
+
+ def __maybe_fake_ip_and_retry(self, countries):
+ if (not self._downloader.params.get('geo_bypass_country', None)
+ and self._GEO_BYPASS
+ and self._downloader.params.get('geo_bypass', True)
+ and not self._x_forwarded_for_ip
+ and countries):
+ country_code = random.choice(countries)
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country_code.upper()))
+ return True
+ return False
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this IE."""
+ self._downloader = downloader
+
+ def _real_initialize(self):
+ """Real initialization process. Redefine in subclasses."""
+ pass
+
+ def _real_extract(self, url):
+ """Real extraction process. Redefine in subclasses."""
+ pass
+
+ @classmethod
+ def ie_key(cls):
+ """A string for getting the InfoExtractor with get_info_extractor"""
+ return compat_str(cls.__name__[:-2])
+
+ @property
+ def IE_NAME(self):
+ return compat_str(type(self).__name__[:-2])
+
+ @staticmethod
+ def __can_accept_status_code(err, expected_status):
+ assert isinstance(err, compat_urllib_error.HTTPError)
+ if expected_status is None:
+ return False
+ if isinstance(expected_status, compat_integer_types):
+ return err.code == expected_status
+ elif isinstance(expected_status, (list, tuple)):
+ return err.code in expected_status
+ elif callable(expected_status):
+ return expected_status(err.code) is True
+ else:
+ assert False
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return the response handle.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ if note is None:
+ self.report_download_webpage(video_id)
+ elif note is not False:
+ if video_id is None:
+ self.to_screen('%s' % (note,))
+ else:
+ self.to_screen('%s: %s' % (video_id, note))
+
+ # Some sites check X-Forwarded-For HTTP header in order to figure out
+ # the origin of the client behind proxy. This allows bypassing geo
+ # restriction by faking this header's value to IP that belongs to some
+ # geo unrestricted country. We will do so once we encounter any
+ # geo restriction error.
+ if self._x_forwarded_for_ip:
+ if 'X-Forwarded-For' not in headers:
+ headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ url_or_request = update_Request(
+ url_or_request, data=data, headers=headers, query=query)
+ else:
+ if query:
+ url_or_request = update_url_query(url_or_request, query)
+ if data is not None or headers:
+ url_or_request = sanitized_Request(url_or_request, data, headers)
+ exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+ if hasattr(ssl, 'CertificateError'):
+ exceptions.append(ssl.CertificateError)
+ try:
+ return self._downloader.urlopen(url_or_request)
+ except tuple(exceptions) as err:
+ if isinstance(err, compat_urllib_error.HTTPError):
+ if self.__can_accept_status_code(err, expected_status):
+ # Retain reference to error to prevent file object from
+ # being closed before it can be read. Works around the
+ # effects of <https://bugs.python.org/issue15002>
+ # introduced in Python 3.4.1.
+ err.fp._error = err
+ return err.fp
+
+ if errnote is False:
+ return False
+ if errnote is None:
+ errnote = 'Unable to download webpage'
+
+ errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+ if fatal:
+ raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
+ else:
+ self._downloader.report_warning(errmsg)
+ return False
+
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return a tuple (page content as string, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ # Strip hashes from the URL (#1038)
+ if isinstance(url_or_request, (compat_str, str)):
+ url_or_request = url_or_request.partition('#')[0]
+
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+ if urlh is False:
+ assert not fatal
+ return False
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+ return (content, urlh)
+
+ @staticmethod
+ def _guess_encoding_from_content(content_type, webpage_bytes):
+ m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+ if m:
+ encoding = m.group(1)
+ else:
+ m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+ webpage_bytes[:1024])
+ if m:
+ encoding = m.group(1).decode('ascii')
+ elif webpage_bytes.startswith(b'\xff\xfe'):
+ encoding = 'utf-16'
+ else:
+ encoding = 'utf-8'
+
+ return encoding
+
+ def __check_blocked(self, content):
+ first_block = content[:512]
+ if ('<title>Access to this site is blocked</title>' in content
+ and 'Websense' in first_block):
+ msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
+ blocked_iframe = self._html_search_regex(
+ r'<iframe src="([^"]+)"', content,
+ 'Websense information URL', default=None)
+ if blocked_iframe:
+ msg += ' Visit %s for more details' % blocked_iframe
+ raise ExtractorError(msg, expected=True)
+ if '<title>The URL you requested has been blocked</title>' in first_block:
+ msg = (
+ 'Access to this webpage has been blocked by Indian censorship. '
+ 'Use a VPN or proxy server (with --proxy) to route around it.')
+ block_msg = self._html_search_regex(
+ r'</h1><p>(.*?)</p>',
+ content, 'block message', default=None)
+ if block_msg:
+ msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+ raise ExtractorError(msg, expected=True)
+ if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
+ and 'blocklist.rkn.gov.ru' in content):
+ raise ExtractorError(
+ 'Access to this webpage has been blocked by decision of the Russian government. '
+ 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
+ expected=True)
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ content_type = urlh.headers.get('Content-Type', '')
+ webpage_bytes = urlh.read()
+ if prefix is not None:
+ webpage_bytes = prefix + webpage_bytes
+ if not encoding:
+ encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
+ if self._downloader.params.get('dump_intermediate_pages', False):
+ self.to_screen('Dumping request to ' + urlh.geturl())
+ dump = base64.b64encode(webpage_bytes).decode('ascii')
+ self._downloader.to_screen(dump)
+ if self._downloader.params.get('write_pages', False):
+ basen = '%s_%s' % (video_id, urlh.geturl())
+ if len(basen) > 240:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:240 - len(h)] + h
+ raw_filename = basen + '.dump'
+ filename = sanitize_filename(raw_filename, restricted=True)
+ self.to_screen('Saving request to ' + filename)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = '\\\\?\\' + absfilepath
+ with open(filename, 'wb') as outf:
+ outf.write(webpage_bytes)
+
+ try:
+ content = webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ content = webpage_bytes.decode('utf-8', 'replace')
+
+ self.__check_blocked(content)
+
+ return content
+
+ def _download_webpage(
+ self, url_or_request, video_id, note=None, errnote=None,
+ fatal=True, tries=1, timeout=5, encoding=None, data=None,
+ headers={}, query={}, expected_status=None):
+ """
+ Return the data of the page as a string.
+
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a compat_urllib_request.Requestobject
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ tries -- number of tries
+ timeout -- sleep interval between tries
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
+ """
+
+ success = False
+ try_count = 0
+ while success is False:
+ try:
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ success = True
+ except compat_http_client.IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
+ if res is False:
+ return res
+ else:
+ content, _ = res
+ return content
+
+ def _download_xml_handle(
+ self, url_or_request, video_id, note='Downloading XML',
+ errnote='Unable to download XML', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (xml as an compat_etree_Element, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ xml_string, urlh = res
+ return self._parse_xml(
+ xml_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_xml(
+ self, url_or_request, video_id,
+ note='Downloading XML', errnote='Unable to download XML',
+ transform_source=None, fatal=True, encoding=None,
+ data=None, headers={}, query={}, expected_status=None):
+ """
+ Return the xml as an compat_etree_Element.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_xml_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
+ if transform_source:
+ xml_string = transform_source(xml_string)
+ try:
+ return compat_etree_fromstring(xml_string.encode('utf-8'))
+ except compat_xml_parse_error as ve:
+ errmsg = '%s: Failed to parse XML ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
+
+ def _download_json_handle(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (JSON object, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ json_string, urlh = res
+ return self._parse_json(
+ json_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_json(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return the JSON object as a dict.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_json_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
+ if transform_source:
+ json_string = transform_source(json_string)
+ try:
+ return json.loads(json_string)
+ except ValueError as ve:
+ errmsg = '%s: Failed to parse JSON ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
+
+ def report_warning(self, msg, video_id=None):
+ idstr = '' if video_id is None else '%s: ' % video_id
+ self._downloader.report_warning(
+ '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+
+ def to_screen(self, msg):
+ """Print msg to screen, prefixing it with '[ie_name]'"""
+ self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+
+ def report_extraction(self, id_or_name):
+ """Report information extraction."""
+ self.to_screen('%s: Extracting information' % id_or_name)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen('%s: Downloading webpage' % video_id)
+
+ def report_age_confirmation(self):
+ """Report attempt to confirm age."""
+ self.to_screen('Confirming age')
+
+ def report_login(self):
+ """Report attempt to log in."""
+ self.to_screen('Logging in')
+
+ @staticmethod
+ def raise_login_required(msg='This video is only available for registered users'):
+ raise ExtractorError(
+ '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+ expected=True)
+
+ @staticmethod
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+ raise GeoRestrictedError(msg, countries=countries)
+
+ # Methods for following #608
+ @staticmethod
+ def url_result(url, ie=None, video_id=None, video_title=None):
+ """Returns a URL that points to a page that should be processed"""
+ # TODO: ie should be the class used for getting the info
+ video_info = {'_type': 'url',
+ 'url': url,
+ 'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
+ if video_title is not None:
+ video_info['title'] = video_title
+ return video_info
+
+ def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+ urls = orderedSet(
+ self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
+ for m in matches)
+ return self.playlist_result(
+ urls, playlist_id=playlist_id, playlist_title=playlist_title)
+
+ @staticmethod
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
+ """Returns a playlist"""
+ video_info = {'_type': 'playlist',
+ 'entries': entries}
+ if playlist_id:
+ video_info['id'] = playlist_id
+ if playlist_title:
+ video_info['title'] = playlist_title
+ if playlist_description:
+ video_info['description'] = playlist_description
+ return video_info
+
+ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Perform a regex search on the given string, using a single or a list of
+ patterns returning the first matching group.
+ In case of failure return a default value or raise a WARNING or a
+ RegexNotFoundError, depending on fatal, specifying the field name.
+ """
+ if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+ mobj = re.search(pattern, string, flags)
+ else:
+ for p in pattern:
+ mobj = re.search(p, string, flags)
+ if mobj:
+ break
+
+ if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+ _name = '\033[0;34m%s\033[0m' % name
+ else:
+ _name = name
+
+ if mobj:
+ if group is None:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ else:
+ return mobj.group(group)
+ elif default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract %s' % _name)
+ else:
+ self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+ return None
+
+ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Like _search_regex, but strips HTML tags and unescapes entities.
+ """
+ res = self._search_regex(pattern, string, name, default, fatal, flags, group)
+ if res:
+ return clean_html(res).strip()
+ else:
+ return res
+
+ def _get_netrc_login_info(self, netrc_machine=None):
+ username = None
+ password = None
+ netrc_machine = netrc_machine or self._NETRC_MACHINE
+
+ if self._downloader.params.get('usenetrc', False):
+ try:
+ info = netrc.netrc().authenticators(netrc_machine)
+ if info is not None:
+ username = info[0]
+ password = info[2]
+ else:
+ raise netrc.NetrcParseError(
+ 'No authenticators for %s' % netrc_machine)
+ except (IOError, netrc.NetrcParseError) as err:
+ self._downloader.report_warning(
+ 'parsing .netrc: %s' % error_to_compat_str(err))
+
+ return username, password
+
+ def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
+ """
+ Get the login info as (username, password)
+ First look for the manually specified credentials using username_option
+ and password_option as keys in params dictionary. If no such credentials
+ available look in the netrc file using the netrc_machine or _NETRC_MACHINE
+ value.
+ If there's no info available, return (None, None)
+ """
+ if self._downloader is None:
+ return (None, None)
+
+ downloader_params = self._downloader.params
+
+ # Attempt to use provided username and password or .netrc data
+ if downloader_params.get(username_option) is not None:
+ username = downloader_params[username_option]
+ password = downloader_params[password_option]
+ else:
+ username, password = self._get_netrc_login_info(netrc_machine)
+
+ return username, password
+
+ def _get_tfa_info(self, note='two-factor verification code'):
+ """
+ Get the two-factor authentication info
+ TODO - asking the user will be required for sms/phone verify
+ currently just uses the command line option
+ If there's no info available, return None
+ """
+ if self._downloader is None:
+ return None
+ downloader_params = self._downloader.params
+
+ if downloader_params.get('twofactor') is not None:
+ return downloader_params['twofactor']
+
+ return compat_getpass('Type %s and press [Return]: ' % note)
+
+ # Helper functions for extracting OpenGraph info
+ @staticmethod
+ def _og_regexes(prop):
+ content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+ property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
+ % {'prop': re.escape(prop)})
+ template = r'<meta[^>]+?%s[^>]+?%s'
+ return [
+ template % (property_re, content_re),
+ template % (content_re, property_re),
+ ]
+
+ @staticmethod
+ def _meta_regex(prop):
+ return r'''(?isx)<meta
+ (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
+ [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+
+ def _og_search_property(self, prop, html, name=None, **kargs):
+ if not isinstance(prop, (list, tuple)):
+ prop = [prop]
+ if name is None:
+ name = 'OpenGraph %s' % prop[0]
+ og_regexes = []
+ for p in prop:
+ og_regexes.extend(self._og_regexes(p))
+ escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
+ if escaped is None:
+ return None
+ return unescapeHTML(escaped)
+
+ def _og_search_thumbnail(self, html, **kargs):
+ return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
+
+ def _og_search_description(self, html, **kargs):
+ return self._og_search_property('description', html, fatal=False, **kargs)
+
+ def _og_search_title(self, html, **kargs):
+ return self._og_search_property('title', html, **kargs)
+
+ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
+ return self._html_search_regex(regexes, html, name, **kargs)
+
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ if not isinstance(name, (list, tuple)):
+ name = [name]
+ if display_name is None:
+ display_name = name[0]
+ return self._html_search_regex(
+ [self._meta_regex(n) for n in name],
+ html, display_name, fatal=fatal, group='content', **kwargs)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+ return 0
+
+ def _media_rating_search(self, html):
+ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+ rating = self._html_search_meta('rating', html)
+
+ if not rating:
+ return None
+
+ RATING_TABLE = {
+ 'safe for kids': 0,
+ 'general': 8,
+ '14 years': 14,
+ 'mature': 17,
+ 'restricted': 19,
+ }
+ return RATING_TABLE.get(rating.lower())
+
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObject
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', html, default=None)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower())
+
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
+ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
+ json_ld_list = list(re.finditer(JSON_LD_RE, html))
+ default = kwargs.get('default', NO_DEFAULT)
+ # JSON-LD may be malformed and thus `fatal` should be respected.
+ # At the same time `default` may be passed that assumes `fatal=False`
+ # for _search_regex. Let's simulate the same behavior here as well.
+ fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+ json_ld = []
+ for mobj in json_ld_list:
+ json_ld_item = self._parse_json(
+ mobj.group('json_ld'), video_id, fatal=fatal)
+ if not json_ld_item:
+ continue
+ if isinstance(json_ld_item, dict):
+ json_ld.append(json_ld_item)
+ elif isinstance(json_ld_item, (list, tuple)):
+ json_ld.extend(json_ld_item)
+ if json_ld:
+ json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
+ if json_ld:
+ return json_ld
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract JSON-LD')
+ else:
+ self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+ return {}
+
+ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
+ if isinstance(json_ld, compat_str):
+ json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
+ if not json_ld:
+ return {}
+ info = {}
+ if not isinstance(json_ld, (list, tuple, dict)):
+ return info
+ if isinstance(json_ld, dict):
+ json_ld = [json_ld]
+
+ INTERACTION_TYPE_MAP = {
+ 'CommentAction': 'comment',
+ 'AgreeAction': 'like',
+ 'DisagreeAction': 'dislike',
+ 'LikeAction': 'like',
+ 'DislikeAction': 'dislike',
+ 'ListenAction': 'view',
+ 'WatchAction': 'view',
+ 'ViewAction': 'view',
+ }
+
+ def extract_interaction_type(e):
+ interaction_type = e.get('interactionType')
+ if isinstance(interaction_type, dict):
+ interaction_type = interaction_type.get('@type')
+ return str_or_none(interaction_type)
+
+ def extract_interaction_statistic(e):
+ interaction_statistic = e.get('interactionStatistic')
+ if isinstance(interaction_statistic, dict):
+ interaction_statistic = [interaction_statistic]
+ if not isinstance(interaction_statistic, list):
+ return
+ for is_e in interaction_statistic:
+ if not isinstance(is_e, dict):
+ continue
+ if is_e.get('@type') != 'InteractionCounter':
+ continue
+ interaction_type = extract_interaction_type(is_e)
+ if not interaction_type:
+ continue
+ # For interaction count some sites provide string instead of
+ # an integer (as per spec) with non digit characters (e.g. ",")
+ # so extracting count with more relaxed str_to_int
+ interaction_count = str_to_int(is_e.get('userInteractionCount'))
+ if interaction_count is None:
+ continue
+ count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+ if not count_kind:
+ continue
+ count_key = '%s_count' % count_kind
+ if info.get(count_key) is not None:
+ continue
+ info[count_key] = interaction_count
+
+ def extract_video_object(e):
+ assert e['@type'] == 'VideoObject'
+ author = e.get('author')
+ info.update({
+ 'url': url_or_none(e.get('contentUrl')),
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('uploadDate')),
+ # author can be an instance of 'Organization' or 'Person' types.
+ # both types can have 'name' property(inherited from 'Thing' type). [1]
+ # however some websites are using 'Text' type instead.
+ # 1. https://schema.org/VideoObject
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
+ 'filesize': float_or_none(e.get('contentSize')),
+ 'tbr': int_or_none(e.get('bitrate')),
+ 'width': int_or_none(e.get('width')),
+ 'height': int_or_none(e.get('height')),
+ 'view_count': int_or_none(e.get('interactionCount')),
+ })
+ extract_interaction_statistic(e)
+
+ for e in json_ld:
+ if '@context' in e:
+ item_type = e.get('@type')
+ if expected_type is not None and expected_type != item_type:
+ continue
+ if item_type in ('TVEpisode', 'Episode'):
+ episode_name = unescapeHTML(e.get('name'))
+ info.update({
+ 'episode': episode_name,
+ 'episode_number': int_or_none(e.get('episodeNumber')),
+ 'description': unescapeHTML(e.get('description')),
+ })
+ if not info.get('title') and episode_name:
+ info['title'] = episode_name
+ part_of_season = e.get('partOfSeason')
+ if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
+ info.update({
+ 'season': unescapeHTML(part_of_season.get('name')),
+ 'season_number': int_or_none(part_of_season.get('seasonNumber')),
+ })
+ part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
+ if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
+ info['series'] = unescapeHTML(part_of_series.get('name'))
+ elif item_type == 'Movie':
+ info.update({
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('dateCreated')),
+ })
+ elif item_type in ('Article', 'NewsArticle'):
+ info.update({
+ 'timestamp': parse_iso8601(e.get('datePublished')),
+ 'title': unescapeHTML(e.get('headline')),
+ 'description': unescapeHTML(e.get('articleBody')),
+ })
+ elif item_type == 'VideoObject':
+ extract_video_object(e)
+ if expected_type is None:
+ continue
+ else:
+ break
+ video = e.get('video')
+ if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ extract_video_object(video)
+ if expected_type is None:
+ continue
+ else:
+ break
+ return dict((k, v) for k, v in info.items() if v is not None)
+
+ @staticmethod
+ def _hidden_inputs(html):
+ html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
+ hidden_inputs = {}
+ for input in re.findall(r'(?i)(<input[^>]+>)', html):
+ attrs = extract_attributes(input)
+ if not input:
+ continue
+ if attrs.get('type') not in ('hidden', 'submit'):
+ continue
+ name = attrs.get('name') or attrs.get('id')
+ value = attrs.get('value')
+ if name and value is not None:
+ hidden_inputs[name] = value
+ return hidden_inputs
+
+ def _form_hidden_inputs(self, form_id, html):
+ form = self._search_regex(
+ r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ html, '%s form' % form_id, group='form')
+ return self._hidden_inputs(form)
+
+ def _sort_formats(self, formats, field_preference=None):
+ if not formats:
+ raise ExtractorError('No video formats found')
+
+ for f in formats:
+ # Automatically determine tbr when missing based on abr and vbr (improves
+ # formats sorting in some cases)
+ if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
+ f['tbr'] = f['abr'] + f['vbr']
+
+ def _formats_key(f):
+ # TODO remove the following workaround
+ from ..utils import determine_ext
+ if not f.get('ext') and 'url' in f:
+ f['ext'] = determine_ext(f['url'])
+
+ if isinstance(field_preference, (list, tuple)):
+ return tuple(
+ f.get(field)
+ if f.get(field) is not None
+ else ('' if field == 'format_id' else -1)
+ for field in field_preference)
+
+ preference = f.get('preference')
+ if preference is None:
+ preference = 0
+ if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
+ preference -= 0.5
+
+ protocol = f.get('protocol') or determine_protocol(f)
+ proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
+
+ if f.get('vcodec') == 'none': # audio only
+ preference -= 50
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
+ else:
+ ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
+ ext_preference = 0
+ try:
+ audio_ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ audio_ext_preference = -1
+ else:
+ if f.get('acodec') == 'none': # video only
+ preference -= 40
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['flv', 'mp4', 'webm']
+ else:
+ ORDER = ['webm', 'flv', 'mp4']
+ try:
+ ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ ext_preference = -1
+ audio_ext_preference = 0
+
+ return (
+ preference,
+ f.get('language_preference') if f.get('language_preference') is not None else -1,
+ f.get('quality') if f.get('quality') is not None else -1,
+ f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('vbr') if f.get('vbr') is not None else -1,
+ f.get('height') if f.get('height') is not None else -1,
+ f.get('width') if f.get('width') is not None else -1,
+ proto_preference,
+ ext_preference,
+ f.get('abr') if f.get('abr') is not None else -1,
+ audio_ext_preference,
+ f.get('fps') if f.get('fps') is not None else -1,
+ f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ f.get('source_preference') if f.get('source_preference') is not None else -1,
+ f.get('format_id') if f.get('format_id') is not None else '',
+ )
+ formats.sort(key=_formats_key)
+
+ def _check_formats(self, formats, video_id):
+ if formats:
+ formats[:] = filter(
+ lambda f: self._is_valid_url(
+ f['url'], video_id,
+ item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+ formats)
+
+ @staticmethod
+ def _remove_duplicate_formats(formats):
+ format_urls = set()
+ unique_formats = []
+ for f in formats:
+ if f['url'] not in format_urls:
+ format_urls.add(f['url'])
+ unique_formats.append(f)
+ formats[:] = unique_formats
+
+ def _is_valid_url(self, url, video_id, item='video', headers={}):
+ url = self._proto_relative_url(url, scheme='http:')
+ # For now assume non HTTP(S) URLs always valid
+ if not (url.startswith('http://') or url.startswith('https://')):
+ return True
+ try:
+ self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
+ return True
+ except ExtractorError as e:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping: %s'
+ % (video_id, item, error_to_compat_str(e.cause)))
+ return False
+
+ def http_scheme(self):
+ """ Either "http:" or "https:", depending on the user's preferences """
+ return (
+ 'http:'
+ if self._downloader.params.get('prefer_insecure', False)
+ else 'https:')
+
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None, data=None, headers={}, query={}):
+ manifest = self._download_xml(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest',
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
+ transform_source=transform_source,
+ fatal=fatal, data=data, headers=headers, query=query)
+
+ if manifest is False:
+ return []
+
+ return self._parse_f4m_formats(
+ manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
+
+ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None):
+ if not isinstance(manifest, compat_etree_Element) and not fatal:
+ return []
+
+ # currently hypervideo cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
+ akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
+ if akamai_pv is not None and ';' in akamai_pv.text:
+ playerVerificationChallenge = akamai_pv.text.split(';')[0]
+ if playerVerificationChallenge.strip() != '':
+ return []
+
+ formats = []
+ manifest_version = '1.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+ if not media_nodes:
+ manifest_version = '2.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+ # Remove unsupported DRM protected media from final formats
+ # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
+ media_nodes = remove_encrypted_media(media_nodes)
+ if not media_nodes:
+ return formats
+
+ manifest_base_url = get_base_url(manifest)
+
+ bootstrap_info = xpath_element(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
+ 'bootstrap info', default=None)
+
+ vcodec = None
+ mime_type = xpath_text(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+ 'base URL', default=None)
+ if mime_type and mime_type.startswith('audio/'):
+ vcodec = 'none'
+
+ for i, media_el in enumerate(media_nodes):
+ tbr = int_or_none(media_el.attrib.get('bitrate'))
+ width = int_or_none(media_el.attrib.get('width'))
+ height = int_or_none(media_el.attrib.get('height'))
+ format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
+ # If <bootstrapInfo> is present, the specified f4m is a
+ # stream-level manifest, and only set-level manifests may refer to
+ # external resources. See section 11.4 and section 4 of F4M spec
+ if bootstrap_info is None:
+ media_url = None
+ # @href is introduced in 2.0, see section 11.6 of F4M spec
+ if manifest_version == '2.0':
+ media_url = media_el.attrib.get('href')
+ if media_url is None:
+ media_url = media_el.attrib.get('url')
+ if not media_url:
+ continue
+ manifest_url = (
+ media_url if media_url.startswith('http://') or media_url.startswith('https://')
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ # If media_url is itself a f4m manifest do the recursive extraction
+ # since bitrates in parent manifest (this one) and media_url manifest
+ # may differ leading to inability to resolve the format by requested
+ # bitrate in f4m downloader
+ ext = determine_ext(manifest_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal)
+ # Sometimes stream-level manifest contains single media entry that
+ # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
+ # At the same time parent's media entry in set-level manifest may
+ # contain it. We will copy it from parent in such cases.
+ if len(f4m_formats) == 1:
+ f = f4m_formats[0]
+ f.update({
+ 'tbr': f.get('tbr') or tbr,
+ 'width': f.get('width') or width,
+ 'height': f.get('height') or height,
+ 'format_id': f.get('format_id') if not tbr else format_id,
+ 'vcodec': vcodec,
+ })
+ formats.extend(f4m_formats)
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', preference=preference,
+ m3u8_id=m3u8_id, fatal=fatal))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': manifest_url,
+ 'manifest_url': manifest_url,
+ 'ext': 'flv' if bootstrap_info is not None else None,
+ 'protocol': 'f4m',
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ 'preference': preference,
+ })
+ return formats
+
+ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
+ return {
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
+ 'url': m3u8_url,
+ 'ext': ext,
+ 'protocol': 'm3u8',
+ 'preference': preference - 100 if preference else -100,
+ 'resolution': 'multiple',
+ 'format_note': 'Quality selection URL',
+ }
+
+ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None, note=None, errnote=None,
+ fatal=True, live=False, data=None, headers={},
+ query={}):
+ res = self._download_webpage_handle(
+ m3u8_url, video_id,
+ note=note or 'Downloading m3u8 information',
+ errnote=errnote or 'Failed to download m3u8 information',
+ fatal=fatal, data=data, headers=headers, query=query)
+
+ if res is False:
+ return []
+
+ m3u8_doc, urlh = res
+ m3u8_url = urlh.geturl()
+
+ return self._parse_m3u8_formats(
+ m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+ preference=preference, m3u8_id=m3u8_id, live=live)
+
+ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None, live=False):
+ if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
+ return []
+
+ if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
+ return []
+
+ formats = []
+
+ format_url = lambda u: (
+ u
+ if re.match(r'^https?://', u)
+ else compat_urlparse.urljoin(m3u8_url, u))
+
+ # References:
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
+ # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
+
+ # We should try extracting formats only from master playlists [1, 4.3.4],
+ # i.e. playlists that describe available qualities. On the other hand
+ # media playlists [1, 4.3.3] should be returned as is since they contain
+ # just the media without qualities renditions.
+ # Fortunately, master playlist can be easily distinguished from media
+ # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
+ # master playlist tags MUST NOT appear in a media playlist and vice versa.
+ # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
+ # media playlist and MUST NOT appear in master playlist thus we can
+ # clearly detect media playlist with this criterion.
+
+ if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
+ return [{
+ 'url': m3u8_url,
+ 'format_id': m3u8_id,
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }]
+
+ groups = {}
+ last_stream_inf = {}
+
+ def extract_media(x_media_line):
+ media = parse_m3u8_attributes(x_media_line)
+ # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
+ media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
+ if not (media_type and group_id and name):
+ return
+ groups.setdefault(group_id, []).append(media)
+ if media_type not in ('VIDEO', 'AUDIO'):
+ return
+ media_url = media.get('URI')
+ if media_url:
+ format_id = []
+ for v in (m3u8_id, group_id, name):
+ if v:
+ format_id.append(v)
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'url': format_url(media_url),
+ 'manifest_url': m3u8_url,
+ 'language': media.get('LANGUAGE'),
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ if media_type == 'AUDIO':
+ f['vcodec'] = 'none'
+ formats.append(f)
+
+ def build_stream_name():
+ # Despite specification does not mention NAME attribute for
+ # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
+ # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
+ # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+ stream_name = last_stream_inf.get('NAME')
+ if stream_name:
+ return stream_name
+ # If there is no NAME in EXT-X-STREAM-INF it will be obtained
+ # from corresponding rendition group
+ stream_group_id = last_stream_inf.get('VIDEO')
+ if not stream_group_id:
+ return
+ stream_group = groups.get(stream_group_id)
+ if not stream_group:
+ return stream_group_id
+ rendition = stream_group[0]
+ return rendition.get('NAME') or stream_group_id
+
+ # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
+ # chance to detect video only formats when EXT-X-STREAM-INF tags
+ # precede EXT-X-MEDIA tags in HLS manifest such as [3].
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-MEDIA:'):
+ extract_media(line)
+
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-STREAM-INF:'):
+ last_stream_inf = parse_m3u8_attributes(line)
+ elif line.startswith('#') or not line.strip():
+ continue
+ else:
+ tbr = float_or_none(
+ last_stream_inf.get('AVERAGE-BANDWIDTH')
+ or last_stream_inf.get('BANDWIDTH'), scale=1000)
+ format_id = []
+ if m3u8_id:
+ format_id.append(m3u8_id)
+ stream_name = build_stream_name()
+ # Bandwidth of live streams may differ over time thus making
+ # format_id unpredictable. So it's better to keep provided
+ # format_id intact.
+ if not live:
+ format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
+ manifest_url = format_url(line.strip())
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'url': manifest_url,
+ 'manifest_url': m3u8_url,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ resolution = last_stream_inf.get('RESOLUTION')
+ if resolution:
+ mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+ if mobj:
+ f['width'] = int(mobj.group('width'))
+ f['height'] = int(mobj.group('height'))
+ # Unified Streaming Platform
+ mobj = re.search(
+ r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
+ if mobj:
+ abr, vbr = mobj.groups()
+ abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+ f.update({
+ 'vbr': vbr,
+ 'abr': abr,
+ })
+ codecs = parse_codecs(last_stream_inf.get('CODECS'))
+ f.update(codecs)
+ audio_group_id = last_stream_inf.get('AUDIO')
+ # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+ # references a rendition group MUST have a CODECS attribute.
+ # However, this is not always respected, for example, [2]
+ # contains EXT-X-STREAM-INF tag which references AUDIO
+ # rendition group but does not have CODECS and despite
+ # referencing an audio group it represents a complete
+ # (with audio and video) format. So, for such cases we will
+ # ignore references to rendition groups and treat them
+ # as complete formats.
+ if audio_group_id and codecs and f.get('vcodec') != 'none':
+ audio_group = groups.get(audio_group_id)
+ if audio_group and audio_group[0].get('URI'):
+ # TODO: update acodec for audio only formats with
+ # the same GROUP-ID
+ f['acodec'] = 'none'
+ formats.append(f)
+
+ # for DailyMotion
+ progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
+ if progressive_uri:
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': progressive_uri,
+ })
+ formats.append(http_f)
+
+ last_stream_inf = {}
+ return formats
+
+ @staticmethod
+ def _xpath_ns(path, namespace=None):
+ if not namespace:
+ return path
+ out = []
+ for c in path.split('/'):
+ if not c or c == '.':
+ out.append(c)
+ else:
+ out.append('{%s}%s' % (namespace, c))
+ return '/'.join(out)
+
+ def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+ smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
+
+ if smil is False:
+ assert not fatal
+ return []
+
+ namespace = self._parse_smil_namespace(smil)
+
+ return self._parse_smil_formats(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+
+ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
+ smil = self._download_smil(smil_url, video_id, fatal=fatal)
+ if smil is False:
+ return {}
+ return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
+
+ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
+ return self._download_xml(
+ smil_url, video_id, 'Downloading SMIL file',
+ 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
+
+ def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
+ namespace = self._parse_smil_namespace(smil)
+
+ formats = self._parse_smil_formats(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+ subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
+
+ video_id = os.path.splitext(url_basename(smil_url))[0]
+ title = None
+ description = None
+ upload_date = None
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ name = meta.attrib.get('name')
+ content = meta.attrib.get('content')
+ if not name or not content:
+ continue
+ if not title and name == 'title':
+ title = content
+ elif not description and name in ('description', 'abstract'):
+ description = content
+ elif not upload_date and name == 'date':
+ upload_date = unified_strdate(content)
+
+ thumbnails = [{
+ 'id': image.get('type'),
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
+
+ return {
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _parse_smil_namespace(self, smil):
+ return self._search_regex(
+ r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ base = smil_url
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ b = meta.get('base') or meta.get('httpBase')
+ if b:
+ base = b
+ break
+
+ formats = []
+ rtmp_count = 0
+ http_count = 0
+ m3u8_count = 0
+
+ srcs = []
+ media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
+ for medium in media:
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.append(src)
+
+ bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
+ filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
+ width = int_or_none(medium.get('width'))
+ height = int_or_none(medium.get('height'))
+ proto = medium.get('proto')
+ ext = medium.get('ext')
+ src_ext = determine_ext(src)
+ streamer = medium.get('streamer') or base
+
+ if proto == 'rtmp' or streamer.startswith('rtmp'):
+ rtmp_count += 1
+ formats.append({
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
+ continue
+
+ src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src_url.strip()
+
+ if proto == 'm3u8' or src_ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+ if len(m3u8_formats) == 1:
+ m3u8_count += 1
+ m3u8_formats[0].update({
+ 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ formats.extend(m3u8_formats)
+ elif src_ext == 'f4m':
+ f4m_url = src_url
+ if not f4m_params:
+ f4m_params = {
+ 'hdcore': '3.2.0',
+ 'plugin': 'flowplayer-3.2.0.1',
+ }
+ f4m_url += '&' if '?' in f4m_url else '?'
+ f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
+ elif src_ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src_url, video_id, mpd_id='dash', fatal=False))
+ elif re.search(r'\.ism/[Mm]anifest', src_url):
+ formats.extend(self._extract_ism_formats(
+ src_url, video_id, ism_id='mss', fatal=False))
+ elif src_url.startswith('http') and self._is_valid_url(src, video_id):
+ http_count += 1
+ formats.append({
+ 'url': src_url,
+ 'ext': ext or src_ext or 'flv',
+ 'format_id': 'http-%d' % (bitrate or http_count),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+
+ return formats
+
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ urls = []
+ subtitles = {}
+ for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
+ src = textstream.get('src')
+ if not src or src in urls:
+ continue
+ urls.append(src)
+ ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
+ subtitles.setdefault(lang, []).append({
+ 'url': src,
+ 'ext': ext,
+ })
+ return subtitles
+
+ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
+ xspf = self._download_xml(
+ xspf_url, playlist_id, 'Downloading xpsf playlist',
+ 'Unable to download xspf manifest', fatal=fatal)
+ if xspf is False:
+ return []
+ return self._parse_xspf(
+ xspf, playlist_id, xspf_url=xspf_url,
+ xspf_base_url=base_url(xspf_url))
+
+ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
+ NS_MAP = {
+ 'xspf': 'http://xspf.org/ns/0/',
+ 's1': 'http://static.streamone.nl/player/ns/0',
+ }
+
+ entries = []
+ for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+ title = xpath_text(
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
+ description = xpath_text(
+ track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+ thumbnail = xpath_text(
+ track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+ duration = float_or_none(
+ xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+ formats = []
+ for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
+ format_url = urljoin(xspf_base_url, location.text)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'manifest_url': xspf_url,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ })
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ })
+ return entries
+
+ def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ res = self._download_xml_handle(
+ mpd_url, video_id,
+ note=note or 'Downloading MPD manifest',
+ errnote=errnote or 'Failed to download MPD manifest',
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return []
+ mpd_doc, urlh = res
+ if mpd_doc is None:
+ return []
+ mpd_base_url = base_url(urlh.geturl())
+
+ return self._parse_mpd_formats(
+ mpd_doc, mpd_id, mpd_base_url, mpd_url)
+
+ def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+ """
+ Parse formats from MPD manifest.
+ References:
+ 1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
+ http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
+ 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
+ """
+ if mpd_doc.get('type') == 'dynamic':
+ return []
+
+ namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
+
+ def _add_ns(path):
+ return self._xpath_ns(path, namespace)
+
+ def is_drm_protected(element):
+ return element.find(_add_ns('ContentProtection')) is not None
+
+ def extract_multisegment_info(element, ms_parent_info):
+ ms_info = ms_parent_info.copy()
+
+ # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
+ # common attributes and elements. We will only extract relevant
+ # for us.
+ def extract_common(source):
+ segment_timeline = source.find(_add_ns('SegmentTimeline'))
+ if segment_timeline is not None:
+ s_e = segment_timeline.findall(_add_ns('S'))
+ if s_e:
+ ms_info['total_number'] = 0
+ ms_info['s'] = []
+ for s in s_e:
+ r = int(s.get('r', 0))
+ ms_info['total_number'] += 1 + r
+ ms_info['s'].append({
+ 't': int(s.get('t', 0)),
+ # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
+ 'd': int(s.attrib['d']),
+ 'r': r,
+ })
+ start_number = source.get('startNumber')
+ if start_number:
+ ms_info['start_number'] = int(start_number)
+ timescale = source.get('timescale')
+ if timescale:
+ ms_info['timescale'] = int(timescale)
+ segment_duration = source.get('duration')
+ if segment_duration:
+ ms_info['segment_duration'] = float(segment_duration)
+
+ def extract_Initialization(source):
+ initialization = source.find(_add_ns('Initialization'))
+ if initialization is not None:
+ ms_info['initialization_url'] = initialization.attrib['sourceURL']
+
+ segment_list = element.find(_add_ns('SegmentList'))
+ if segment_list is not None:
+ extract_common(segment_list)
+ extract_Initialization(segment_list)
+ segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
+ if segment_urls_e:
+ ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
+ else:
+ segment_template = element.find(_add_ns('SegmentTemplate'))
+ if segment_template is not None:
+ extract_common(segment_template)
+ media = segment_template.get('media')
+ if media:
+ ms_info['media'] = media
+ initialization = segment_template.get('initialization')
+ if initialization:
+ ms_info['initialization'] = initialization
+ else:
+ extract_Initialization(segment_template)
+ return ms_info
+
+ mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
+ formats = []
+ for period in mpd_doc.findall(_add_ns('Period')):
+ period_duration = parse_duration(period.get('duration')) or mpd_duration
+ period_ms_info = extract_multisegment_info(period, {
+ 'start_number': 1,
+ 'timescale': 1,
+ })
+ for adaptation_set in period.findall(_add_ns('AdaptationSet')):
+ if is_drm_protected(adaptation_set):
+ continue
+ adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
+ for representation in adaptation_set.findall(_add_ns('Representation')):
+ if is_drm_protected(representation):
+ continue
+ representation_attrib = adaptation_set.attrib.copy()
+ representation_attrib.update(representation.attrib)
+ # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
+ mime_type = representation_attrib['mimeType']
+ content_type = mime_type.split('/')[0]
+ if content_type == 'text':
+ # TODO implement WebVTT downloading
+ pass
+ elif content_type in ('video', 'audio'):
+ base_url = ''
+ for element in (representation, adaptation_set, period, mpd_doc):
+ base_url_e = element.find(_add_ns('BaseURL'))
+ if base_url_e is not None:
+ base_url = base_url_e.text + base_url
+ if re.match(r'^https?://', base_url):
+ break
+ if mpd_base_url and not re.match(r'^https?://', base_url):
+ if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
+ mpd_base_url += '/'
+ base_url = mpd_base_url + base_url
+ representation_id = representation_attrib.get('id')
+ lang = representation_attrib.get('lang')
+ url_el = representation.find(_add_ns('BaseURL'))
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+ f = {
+ 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+ 'manifest_url': mpd_url,
+ 'ext': mimetype2ext(mime_type),
+ 'width': int_or_none(representation_attrib.get('width')),
+ 'height': int_or_none(representation_attrib.get('height')),
+ 'tbr': float_or_none(bandwidth, 1000),
+ 'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+ 'fps': int_or_none(representation_attrib.get('frameRate')),
+ 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+ 'format_note': 'DASH %s' % content_type,
+ 'filesize': filesize,
+ 'container': mimetype2ext(mime_type) + '_dash',
+ }
+ f.update(parse_codecs(representation_attrib.get('codecs')))
+ representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+
+ def prepare_template(template_name, identifiers):
+ tmpl = representation_ms_info[template_name]
+ # First of, % characters outside $...$ templates
+ # must be escaped by doubling for proper processing
+ # by % operator string formatting used further (see
+ # https://github.com/ytdl-org/youtube-dl/issues/16867).
+ t = ''
+ in_template = False
+ for c in tmpl:
+ t += c
+ if c == '$':
+ in_template = not in_template
+ elif c == '%' and not in_template:
+ t += c
+ # Next, $...$ templates are translated to their
+ # %(...) counterparts to be used with % operator
+ t = t.replace('$RepresentationID$', representation_id)
+ t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+ t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+ t.replace('$$', '$')
+ return t
+
+ # @initialization is a regular template like @media one
+ # so it should be handled just the same way (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11605)
+ if 'initialization' in representation_ms_info:
+ initialization_template = prepare_template(
+ 'initialization',
+ # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+ # $Time$ shall not be included for @initialization thus
+ # only $Bandwidth$ remains
+ ('Bandwidth', ))
+ representation_ms_info['initialization_url'] = initialization_template % {
+ 'Bandwidth': bandwidth,
+ }
+
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
+ if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+ media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
+
+ # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+ # can't be used at the same time
+ if '%(Number' in media_template and 's' not in representation_ms_info:
+ segment_duration = None
+ if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+ segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+ representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+ representation_ms_info['fragments'] = [{
+ media_location_key: media_template % {
+ 'Number': segment_number,
+ 'Bandwidth': bandwidth,
+ },
+ 'duration': segment_duration,
+ } for segment_number in range(
+ representation_ms_info['start_number'],
+ representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+ else:
+ # $Number*$ or $Time$ in media template with S list available
+ # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+ # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+ representation_ms_info['fragments'] = []
+ segment_time = 0
+ segment_d = None
+ segment_number = representation_ms_info['start_number']
+
+ def add_segment_url():
+ segment_url = media_template % {
+ 'Time': segment_time,
+ 'Bandwidth': bandwidth,
+ 'Number': segment_number,
+ }
+ representation_ms_info['fragments'].append({
+ media_location_key: segment_url,
+ 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+ })
+
+ for num, s in enumerate(representation_ms_info['s']):
+ segment_time = s.get('t') or segment_time
+ segment_d = s['d']
+ add_segment_url()
+ segment_number += 1
+ for r in range(s.get('r', 0)):
+ segment_time += segment_d
+ add_segment_url()
+ segment_number += 1
+ segment_time += segment_d
+ elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+ # No media template
+ # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+ # or any YouTube dashsegments video
+ fragments = []
+ segment_index = 0
+ timescale = representation_ms_info['timescale']
+ for s in representation_ms_info['s']:
+ duration = float_or_none(s['d'], timescale)
+ for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
+ fragments.append({
+ location_key(segment_uri): segment_uri,
+ 'duration': duration,
+ })
+ segment_index += 1
+ representation_ms_info['fragments'] = fragments
+ elif 'segment_urls' in representation_ms_info:
+ # Segment URLs with no SegmentTimeline
+ # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
+ fragments = []
+ segment_duration = float_or_none(
+ representation_ms_info['segment_duration'],
+ representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+ for segment_url in representation_ms_info['segment_urls']:
+ fragment = {
+ location_key(segment_url): segment_url,
+ }
+ if segment_duration:
+ fragment['duration'] = segment_duration
+ fragments.append(fragment)
+ representation_ms_info['fragments'] = fragments
+ # If there is a fragments key available then we correctly recognized fragmented media.
+ # Otherwise we will assume unfragmented media with direct access. Technically, such
+ # assumption is not necessarily correct since we may simply have no support for
+ # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+ if 'fragments' in representation_ms_info:
+ f.update({
+ # NB: mpd_url may be empty when MPD manifest is parsed from a string
+ 'url': mpd_url or base_url,
+ 'fragment_base_url': base_url,
+ 'fragments': [],
+ 'protocol': 'http_dash_segments',
+ })
+ if 'initialization_url' in representation_ms_info:
+ initialization_url = representation_ms_info['initialization_url']
+ if not f.get('url'):
+ f['url'] = initialization_url
+ f['fragments'].append({location_key(initialization_url): initialization_url})
+ f['fragments'].extend(representation_ms_info['fragments'])
+ else:
+ # Assuming direct URL to unfragmented media.
+ f['url'] = base_url
+ formats.append(f)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+ return formats
+
+ def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ res = self._download_xml_handle(
+ ism_url, video_id,
+ note=note or 'Downloading ISM manifest',
+ errnote=errnote or 'Failed to download ISM manifest',
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return []
+ ism_doc, urlh = res
+ if ism_doc is None:
+ return []
+
+ return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+
+ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ """
+ Parse formats from ISM manifest.
+ References:
+ 1. [MS-SSTR]: Smooth Streaming Protocol,
+ https://msdn.microsoft.com/en-us/library/ff469518.aspx
+ """
+ if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
+ return []
+
+ duration = int(ism_doc.attrib['Duration'])
+ timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+ formats = []
+ for stream in ism_doc.findall('StreamIndex'):
+ stream_type = stream.get('Type')
+ if stream_type not in ('video', 'audio'):
+ continue
+ url_pattern = stream.attrib['Url']
+ stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+ stream_name = stream.get('Name')
+ for track in stream.findall('QualityLevel'):
+ fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
+ # TODO: add support for WVC1 and WMAP
+ if fourcc not in ('H264', 'AVC1', 'AACL'):
+ self.report_warning('%s is not a supported codec' % fourcc)
+ continue
+ tbr = int(track.attrib['Bitrate']) // 1000
+ # [1] does not mention Width and Height attributes. However,
+ # they're often present while MaxWidth and MaxHeight are
+ # missing, so should be used as fallbacks
+ width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+ height = int_or_none(track.get('MaxHeight') or track.get('Height'))
+ sampling_rate = int_or_none(track.get('SamplingRate'))
+
+ track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+ track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+
+ fragments = []
+ fragment_ctx = {
+ 'time': 0,
+ }
+ stream_fragments = stream.findall('c')
+ for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+ fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+ fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+ fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+ if not fragment_ctx['duration']:
+ try:
+ next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+ except IndexError:
+ next_fragment_time = duration
+ fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
+ for _ in range(fragment_repeat):
+ fragments.append({
+ 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'duration': fragment_ctx['duration'] / stream_timescale,
+ })
+ fragment_ctx['time'] += fragment_ctx['duration']
+
+ format_id = []
+ if ism_id:
+ format_id.append(ism_id)
+ if stream_name:
+ format_id.append(stream_name)
+ format_id.append(compat_str(tbr))
+
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ '_download_params': {
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats
+
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
+ def absolute_url(item_url):
+ return urljoin(base_url, item_url)
+
+ def parse_content_type(content_type):
+ if not content_type:
+ return {}
+ ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
+ if ctr:
+ mimetype, codecs = ctr.groups()
+ f = parse_codecs(codecs)
+ f['ext'] = mimetype2ext(mimetype)
+ return f
+ return {}
+
+ def _media_formats(src, cur_media_type, type_info={}):
+ full_url = absolute_url(src)
+ ext = type_info.get('ext') or determine_ext(full_url)
+ if ext == 'm3u8':
+ is_plain_url = False
+ formats = self._extract_m3u8_formats(
+ full_url, video_id, ext='mp4',
+ entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
+ preference=preference, fatal=False)
+ elif ext == 'mpd':
+ is_plain_url = False
+ formats = self._extract_mpd_formats(
+ full_url, video_id, mpd_id=mpd_id, fatal=False)
+ else:
+ is_plain_url = True
+ formats = [{
+ 'url': full_url,
+ 'vcodec': 'none' if cur_media_type == 'audio' else None,
+ }]
+ return is_plain_url, formats
+
+ entries = []
+ # amp-video and amp-audio are very similar to their HTML5 counterparts
+ # so we wll include them right here (see
+ # https://www.ampproject.org/docs/reference/components/amp-video)
+ # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
+ _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
+ media_tags = [(media_tag, media_tag_name, media_type, '')
+ for media_tag, media_tag_name, media_type
+ in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
+ media_tags.extend(re.findall(
+ # We only allow video|audio followed by a whitespace or '>'.
+ # Allowing more characters may end up in significant slow down (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
+ # http://www.porntrex.com/maps/videositemap.xml).
+ r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
+ for media_tag, _, media_type, media_content in media_tags:
+ media_info = {
+ 'formats': [],
+ 'subtitles': {},
+ }
+ media_attributes = extract_attributes(media_tag)
+ src = strip_or_none(media_attributes.get('src'))
+ if src:
+ _, formats = _media_formats(src, media_type)
+ media_info['formats'].extend(formats)
+ media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
+ if media_content:
+ for source_tag in re.findall(r'<source[^>]+>', media_content):
+ s_attr = extract_attributes(source_tag)
+ # data-video-src and data-src are non standard but seen
+ # several times in the wild
+ src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
+ if not src:
+ continue
+ f = parse_content_type(s_attr.get('type'))
+ is_plain_url, formats = _media_formats(src, media_type, f)
+ if is_plain_url:
+ # width, height, res, label and title attributes are
+ # all not standard but seen several times in the wild
+ labels = [
+ s_attr.get(lbl)
+ for lbl in ('label', 'title')
+ if str_or_none(s_attr.get(lbl))
+ ]
+ width = int_or_none(s_attr.get('width'))
+ height = (int_or_none(s_attr.get('height'))
+ or int_or_none(s_attr.get('res')))
+ if not width or not height:
+ for lbl in labels:
+ resolution = parse_resolution(lbl)
+ if not resolution:
+ continue
+ width = width or resolution.get('width')
+ height = height or resolution.get('height')
+ for lbl in labels:
+ tbr = parse_bitrate(lbl)
+ if tbr:
+ break
+ else:
+ tbr = None
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'format_id': s_attr.get('label') or s_attr.get('title'),
+ })
+ f.update(formats[0])
+ media_info['formats'].append(f)
+ else:
+ media_info['formats'].extend(formats)
+ for track_tag in re.findall(r'<track[^>]+>', media_content):
+ track_attributes = extract_attributes(track_tag)
+ kind = track_attributes.get('kind')
+ if not kind or kind in ('subtitles', 'captions'):
+ src = strip_or_none(track_attributes.get('src'))
+ if not src:
+ continue
+ lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
+ media_info['subtitles'].setdefault(lang, []).append({
+ 'url': absolute_url(src),
+ })
+ for f in media_info['formats']:
+ f.setdefault('http_headers', {})['Referer'] = base_url
+ if media_info['formats'] or media_info['subtitles']:
+ entries.append(media_info)
+ return entries
+
+ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+ signed = 'hdnea=' in manifest_url
+ if not signed:
+ # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
+ manifest_url = re.sub(
+ r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
+ '', manifest_url).strip('?')
+
+ formats = []
+
+ hdcore_sign = 'hdcore=3.7.0'
+ f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ hds_host = hosts.get('hds')
+ if hds_host:
+ f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
+ if 'hdcore=' not in f4m_url:
+ f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
+ f4m_formats = self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False)
+ for entry in f4m_formats:
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.extend(f4m_formats)
+
+ m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ hls_host = hosts.get('hls')
+ if hls_host:
+ m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+
+ http_host = hosts.get('http')
+ if http_host and m3u8_formats and not signed:
+ REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
+ qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
+ qualities_length = len(qualities)
+ if len(m3u8_formats) in (qualities_length, qualities_length + 1):
+ i = 0
+ for f in m3u8_formats:
+ if f['vcodec'] != 'none':
+ for protocol in ('http', 'https'):
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_url = re.sub(
+ REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
+ http_f.update({
+ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
+ 'url': http_url,
+ 'protocol': protocol,
+ })
+ formats.append(http_f)
+ i += 1
+
+ return formats
+
+ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+ query = compat_urlparse.urlparse(url).query
+ url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
+ mobj = re.search(
+ r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
+ url_base = mobj.group('url')
+ http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
+ formats = []
+
+ def manifest_url(manifest):
+ m_url = '%s/%s' % (http_base_url, manifest)
+ if query:
+ m_url += '?%s' % query
+ return m_url
+
+ if 'm3u8' not in skip_protocols:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url('playlist.m3u8'), video_id, 'mp4',
+ m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+ if 'f4m' not in skip_protocols:
+ formats.extend(self._extract_f4m_formats(
+ manifest_url('manifest.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ if 'dash' not in skip_protocols:
+ formats.extend(self._extract_mpd_formats(
+ manifest_url('manifest.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if re.search(r'(?:/smil:|\.smil)', url_base):
+ if 'smil' not in skip_protocols:
+ rtmp_formats = self._extract_smil_formats(
+ manifest_url('jwplayer.smil'),
+ video_id, fatal=False)
+ for rtmp_format in rtmp_formats:
+ rtsp_format = rtmp_format.copy()
+ rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+ del rtsp_format['play_path']
+ del rtsp_format['ext']
+ rtsp_format.update({
+ 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
+ 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+ 'protocol': 'rtsp',
+ })
+ formats.extend([rtmp_format, rtsp_format])
+ else:
+ for protocol in ('rtmp', 'rtsp'):
+ if protocol not in skip_protocols:
+ formats.append({
+ 'url': '%s:%s' % (protocol, url_base),
+ 'format_id': protocol,
+ 'protocol': protocol,
+ })
+ return formats
+
+ def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
+ mobj = re.search(
+ r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
+ webpage)
+ if mobj:
+ try:
+ jwplayer_data = self._parse_json(mobj.group('options'),
+ video_id=video_id,
+ transform_source=transform_source)
+ except ExtractorError:
+ pass
+ else:
+ if isinstance(jwplayer_data, dict):
+ return jwplayer_data
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ return self._parse_jwplayer_data(
+ jwplayer_data, video_id, *args, **kwargs)
+
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ # JWPlayer backward compatibility: flattened playlists
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+ if 'playlist' not in jwplayer_data:
+ jwplayer_data = {'playlist': [jwplayer_data]}
+
+ entries = []
+
+ # JWPlayer backward compatibility: single playlist item
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ if not isinstance(jwplayer_data['playlist'], list):
+ jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+ for video_data in jwplayer_data['playlist']:
+ # JWPlayer backward compatibility: flattened sources
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+ if 'sources' not in video_data:
+ video_data['sources'] = [video_data]
+
+ this_video_id = video_id or video_data['mediaid']
+
+ formats = self._parse_jwplayer_formats(
+ video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+ mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
+
+ subtitles = {}
+ tracks = video_data.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = track.get('kind')
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
+
+ entry = {
+ 'id': this_video_id,
+ 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+ 'subtitles': subtitles,
+ }
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+ if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+ entry.update({
+ '_type': 'url_transparent',
+ 'url': formats[0]['url'],
+ })
+ else:
+ self._sort_formats(formats)
+ entry['formats'] = formats
+ entries.append(entry)
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries)
+
+ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ urls = []
+ formats = []
+ for source in jwplayer_sources_data:
+ if not isinstance(source, dict):
+ continue
+ source_url = urljoin(
+ base_url, self._proto_relative_url(source.get('file')))
+ if not source_url or source_url in urls:
+ continue
+ urls.append(source_url)
+ source_type = source.get('type') or ''
+ ext = mimetype2ext(source_type) or determine_ext(source_url)
+ if source_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id, fatal=False))
+ elif source_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, video_id, mpd_id=mpd_id, fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+ elif source_type.startswith('audio') or ext in (
+ 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ 'ext': ext,
+ })
+ else:
+ height = int_or_none(source.get('height'))
+ if height is None:
+ # Often no height is provided but there is a label in
+ # format like "1080p", "720p SD", or 1080.
+ height = int_or_none(self._search_regex(
+ r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ 'height', default=None))
+ a_format = {
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(source.get('bitrate')),
+ 'ext': ext,
+ }
+ if source_url.startswith('rtmp'):
+ a_format['ext'] = 'flv'
+ # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+ # of jwplayer.flash.swf
+ rtmp_url_parts = re.split(
+ r'((?:mp4|mp3|flv):)', source_url, 1)
+ if len(rtmp_url_parts) == 3:
+ rtmp_url, prefix, play_path = rtmp_url_parts
+ a_format.update({
+ 'url': rtmp_url,
+ 'play_path': prefix + play_path,
+ })
+ if rtmp_params:
+ a_format.update(rtmp_params)
+ formats.append(a_format)
+ return formats
+
+ def _live_title(self, name):
+ """ Generate the title for a live video """
+ now = datetime.datetime.now()
+ now_str = now.strftime('%Y-%m-%d %H:%M')
+ return name + ' ' + now_str
+
+ def _int(self, v, name, fatal=False, **kwargs):
+ res = int_or_none(v, **kwargs)
+ if 'get_attr' in kwargs:
+ print(getattr(v, kwargs['get_attr']))
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _float(self, v, name, fatal=False, **kwargs):
+ res = float_or_none(v, **kwargs)
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+ path='/', secure=False, discard=False, rest={}, **kwargs):
+ cookie = compat_cookiejar_Cookie(
+ 0, name, value, port, port is not None, domain, True,
+ domain.startswith('.'), path, True, secure, expire_time,
+ discard, None, None, rest)
+ self._downloader.cookiejar.set_cookie(cookie)
+
+ def _get_cookies(self, url):
+ """ Return a compat_cookies_SimpleCookie with the cookies for the url """
+ req = sanitized_Request(url)
+ self._downloader.cookiejar.add_cookie_header(req)
+ return compat_cookies_SimpleCookie(req.get_header('Cookie'))
+
+ def _apply_first_set_cookie_header(self, url_handle, cookie):
+ """
+ Apply first Set-Cookie header instead of the last. Experimental.
+
+ Some sites (e.g. [1-3]) may serve two cookies under the same name
+ in Set-Cookie header and expect the first (old) one to be set rather
+ than second (new). However, as of RFC6265 the newer one cookie
+ should be set into cookie store what actually happens.
+ We will workaround this issue by resetting the cookie to
+ the first one manually.
+ 1. https://new.vk.com/
+ 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
+ 3. https://learning.oreilly.com/
+ """
+ for header, cookies in url_handle.headers.items():
+ if header.lower() != 'set-cookie':
+ continue
+ if sys.version_info[0] >= 3:
+ cookies = cookies.encode('iso-8859-1')
+ cookies = cookies.decode('utf-8')
+ cookie_value = re.search(
+ r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+ if cookie_value:
+ value, domain = cookie_value.groups()
+ self._set_cookie(domain, cookie, value)
+ break
+
+ def get_testcases(self, include_onlymatching=False):
+ t = getattr(self, '_TEST', None)
+ if t:
+ assert not hasattr(self, '_TESTS'), \
+ '%s has _TEST and _TESTS' % type(self).__name__
+ tests = [t]
+ else:
+ tests = getattr(self, '_TESTS', [])
+ for t in tests:
+ if not include_onlymatching and t.get('only_matching', False):
+ continue
+ t['name'] = type(self).__name__[:-len('IE')]
+ yield t
+
+ def is_suitable(self, age_limit):
+ """ Test whether the extractor is generally suitable for the given
+ age limit (i.e. pornographic sites are not, all others usually are) """
+
+ any_restricted = False
+ for tc in self.get_testcases(include_onlymatching=False):
+ if tc.get('playlist', []):
+ tc = tc['playlist'][0]
+ is_restricted = age_restricted(
+ tc.get('info_dict', {}).get('age_limit'), age_limit)
+ if not is_restricted:
+ return True
+ any_restricted = any_restricted or is_restricted
+ return not any_restricted
+
+ def extract_subtitles(self, *args, **kwargs):
+ if (self._downloader.params.get('writesubtitles', False)
+ or self._downloader.params.get('listsubtitles')):
+ return self._get_subtitles(*args, **kwargs)
+ return {}
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs
+ will be dropped. """
+ list1_urls = set([item['url'] for item in subtitle_list1])
+ ret = list(subtitle_list1)
+ ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+ return ret
+
+ @classmethod
+ def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+ """ Merge two subtitle dictionaries, language by language. """
+ ret = dict(subtitle_dict1)
+ for lang in subtitle_dict2:
+ ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ return ret
+
+ def extract_automatic_captions(self, *args, **kwargs):
+ if (self._downloader.params.get('writeautomaticsub', False)
+ or self._downloader.params.get('listsubtitles')):
+ return self._get_automatic_captions(*args, **kwargs)
+ return {}
+
+ def _get_automatic_captions(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def mark_watched(self, *args, **kwargs):
+ if (self._downloader.params.get('mark_watched', False)
+ and (self._get_login_info()[0] is not None
+ or self._downloader.params.get('cookiefile') is not None)):
+ self._mark_watched(*args, **kwargs)
+
+ def _mark_watched(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def geo_verification_headers(self):
+ headers = {}
+ geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ if geo_verification_proxy:
+ headers['Ytdl-request-proxy'] = geo_verification_proxy
+ return headers
+
+ def _generic_id(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+ def _generic_title(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+
+
+class SearchInfoExtractor(InfoExtractor):
+ """
+ Base class for paged search queries extractors.
+ They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
+ Instances should define _SEARCH_KEY and _MAX_RESULTS.
+ """
+
+ @classmethod
+ def _make_valid_url(cls):
+ return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
+
+ @classmethod
+ def suitable(cls, url):
+ return re.match(cls._make_valid_url(), url) is not None
+
+ def _real_extract(self, query):
+ mobj = re.match(self._make_valid_url(), query)
+ if mobj is None:
+ raise ExtractorError('Invalid search query "%s"' % query)
+
+ prefix = mobj.group('prefix')
+ query = mobj.group('query')
+ if prefix == '':
+ return self._get_n_results(query, 1)
+ elif prefix == 'all':
+ return self._get_n_results(query, self._MAX_RESULTS)
+ else:
+ n = int(prefix)
+ if n <= 0:
+ raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
+ elif n > self._MAX_RESULTS:
+ self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+ n = self._MAX_RESULTS
+ return self._get_n_results(query, n)
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @property
+ def SEARCH_KEY(self):
+ return self._SEARCH_KEY
diff --git a/hypervideo_dl/extractor/commonmistakes.py b/hypervideo_dl/extractor/commonmistakes.py
new file mode 100644
index 0000000..ed9d26e
--- /dev/null
+++ b/hypervideo_dl/extractor/commonmistakes.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import sys
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:url|URL)$
+ '''
+
+ _TESTS = [{
+ 'url': 'url',
+ 'only_matching': True,
+ }, {
+ 'url': 'URL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ msg = (
+ 'You\'ve asked hypervideo to download the URL "%s". '
+ 'That doesn\'t make any sense. '
+ 'Simply remove the parameter in your command or configuration.'
+ ) % url
+ if not self._downloader.params.get('verbose'):
+ msg += ' Add -v to the command line to see what arguments and configuration hypervideo got.'
+ raise ExtractorError(msg, expected=True)
+
+
+class UnicodeBOMIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ # Disable test for python 3.2 since BOM is broken in re in this version
+ # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
+ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
diff --git a/hypervideo_dl/extractor/commonprotocols.py b/hypervideo_dl/extractor/commonprotocols.py
new file mode 100644
index 0000000..d98331a
--- /dev/null
+++ b/hypervideo_dl/extractor/commonprotocols.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+)
+
+
+class RtmpIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)rtmp[est]?://.+'
+
+ _TESTS = [{
+ 'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': [{
+ 'url': url,
+ 'ext': 'flv',
+ 'format_id': compat_urlparse.urlparse(url).scheme,
+ }],
+ }
+
+
+class MmsIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)mms://.+'
+
+ _TEST = {
+ # Direct MMS link
+ 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
+ 'info_dict': {
+ 'id': 'MilesReid(0709)',
+ 'ext': 'wmv',
+ 'title': 'MilesReid(0709)',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads, requiring mplayer or mpv
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': url,
+ }
diff --git a/hypervideo_dl/extractor/condenast.py b/hypervideo_dl/extractor/condenast.py
new file mode 100644
index 0000000..d5e77af
--- /dev/null
+++ b/hypervideo_dl/extractor/condenast.py
@@ -0,0 +1,251 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ orderedSet,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class CondeNastIE(InfoExtractor):
+ """
+ Condé Nast is a media group, some of its sites use a custom HTML5 player
+ that works the same in all of them.
+ """
+
+ # The keys are the supported sites and the values are the name to be shown
+ # to the user and in the extractor description.
+ _SITES = {
+ 'allure': 'Allure',
+ 'architecturaldigest': 'Architectural Digest',
+ 'arstechnica': 'Ars Technica',
+ 'bonappetit': 'Bon Appétit',
+ 'brides': 'Brides',
+ 'cnevids': 'Condé Nast',
+ 'cntraveler': 'Condé Nast Traveler',
+ 'details': 'Details',
+ 'epicurious': 'Epicurious',
+ 'glamour': 'Glamour',
+ 'golfdigest': 'Golf Digest',
+ 'gq': 'GQ',
+ 'newyorker': 'The New Yorker',
+ 'self': 'SELF',
+ 'teenvogue': 'Teen Vogue',
+ 'vanityfair': 'Vanity Fair',
+ 'vogue': 'Vogue',
+ 'wired': 'WIRED',
+ 'wmagazine': 'W Magazine',
+ }
+
+ _VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/
+ (?:
+ (?:
+ embed(?:js)?|
+ (?:script|inline)/video
+ )/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?|
+ (?P<type>watch|series|video)/(?P<display_id>[^/?#]+)
+ )''' % '|'.join(_SITES.keys())
+ IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+
+ EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys())
+
+ _TESTS = [{
+ 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+ 'md5': '1921f713ed48aabd715691f774c451f7',
+ 'info_dict': {
+ 'id': '5171b343c2b4c00dd0c1ccb3',
+ 'ext': 'mp4',
+ 'title': '3D Printed Speakers Lit With LED',
+ 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+ 'uploader': 'wired',
+ 'upload_date': '20130314',
+ 'timestamp': 1363219200,
+ }
+ }, {
+ 'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
+ 'info_dict': {
+ 'id': '58d1865bfd2e6126e2000015',
+ 'ext': 'mp4',
+ 'title': 'The Only True Surprise? Trump’s an Idiot',
+ 'uploader': 'gq',
+ 'upload_date': '20170321',
+ 'timestamp': 1490126427,
+ 'description': 'How much grimmer would things be if these people were competent?',
+ },
+ }, {
+ # JS embed
+ 'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
+ 'md5': 'f1a6f9cafb7083bab74a710f65d08999',
+ 'info_dict': {
+ 'id': '55f9cf8b61646d1acf00000c',
+ 'ext': 'mp4',
+ 'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
+ 'uploader': 'arstechnica',
+ 'upload_date': '20150916',
+ 'timestamp': 1442434920,
+ }
+ }, {
+ 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js',
+ 'only_matching': True,
+ }]
+
+ def _extract_series(self, url, webpage):
+ title = self._html_search_regex(
+ r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>',
+ webpage, 'series title')
+ url_object = compat_urllib_parse_urlparse(url)
+ base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
+ m_paths = re.finditer(
+ r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
+ paths = orderedSet(m.group(1) for m in m_paths)
+ build_url = lambda path: compat_urlparse.urljoin(base_url, path)
+ entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
+ return self.playlist_result(entries, playlist_title=title)
+
+ def _extract_video_params(self, webpage, display_id):
+ query = self._parse_json(
+ self._search_regex(
+ r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
+ default='{}'),
+ display_id, transform_source=js_to_json, fatal=False)
+ if query:
+ query['videoId'] = self._search_regex(
+ r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
+ webpage, 'video id', default=None)
+ else:
+ params = extract_attributes(self._search_regex(
+ r'(<[^>]+data-js="video-player"[^>]+>)',
+ webpage, 'player params element'))
+ query.update({
+ 'videoId': params['data-video'],
+ 'playerId': params['data-player'],
+ 'target': params['id'],
+ })
+ return query
+
+ def _extract_video(self, params):
+ video_id = params['videoId']
+
+ video_info = None
+
+ # New API path
+ query = params.copy()
+ query['embedType'] = 'inline'
+ info_page = self._download_json(
+ 'http://player.cnevids.com/embed-api.json', video_id,
+ 'Downloading embed info', fatal=False, query=query)
+
+ # Old fallbacks
+ if not info_page:
+ if params.get('playerId'):
+ info_page = self._download_json(
+ 'http://player.cnevids.com/player/video.js', video_id,
+ 'Downloading video info', fatal=False, query=params)
+ if info_page:
+ video_info = info_page.get('video')
+ if not video_info:
+ info_page = self._download_webpage(
+ 'http://player.cnevids.com/player/loader.js',
+ video_id, 'Downloading loader info', query=params)
+ if not video_info:
+ info_page = self._download_webpage(
+ 'https://player.cnevids.com/inline/video/%s.js' % video_id,
+ video_id, 'Downloading inline info', query={
+ 'target': params.get('target', 'embedplayer')
+ })
+
+ if not video_info:
+ video_info = self._parse_json(
+ self._search_regex(
+ r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
+ video_id, transform_source=js_to_json)['video']
+
+ title = video_info['title']
+
+ formats = []
+ for fdata in video_info['sources']:
+ src = fdata.get('src')
+ if not src:
+ continue
+ ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ quality = fdata.get('quality')
+ formats.append({
+ 'format_id': ext + ('-%s' % quality if quality else ''),
+ 'url': src,
+ 'ext': ext,
+ 'quality': 1 if quality == 'high' else 0,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for t, caption in video_info.get('captions', {}).items():
+ caption_url = caption.get('src')
+ if not (t in ('vtt', 'srt', 'tml') and caption_url):
+ continue
+ subtitles.setdefault('en', []).append({'url': caption_url})
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': video_info.get('poster_frame'),
+ 'uploader': video_info.get('brand'),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'tags': video_info.get('tags'),
+ 'series': video_info.get('series_title'),
+ 'season': video_info.get('season_title'),
+ 'timestamp': parse_iso8601(video_info.get('premiere_date')),
+ 'categories': video_info.get('categories'),
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups()
+
+ if video_id:
+ return self._extract_video({
+ 'videoId': video_id,
+ 'playerId': player_id,
+ 'target': target,
+ })
+
+ webpage = self._download_webpage(url, display_id)
+
+ if url_type == 'series':
+ return self._extract_series(url, webpage)
+ else:
+ video = try_get(self._parse_json(self._search_regex(
+ r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+ 'preload state', '{}'), display_id),
+ lambda x: x['transformed']['video'])
+ if video:
+ params = {'videoId': video['id']}
+ info = {'description': strip_or_none(video.get('description'))}
+ else:
+ params = self._extract_video_params(webpage, display_id)
+ info = self._search_json_ld(
+ webpage, display_id, fatal=False)
+ info.update(self._extract_video(params))
+ return info
diff --git a/hypervideo_dl/extractor/contv.py b/hypervideo_dl/extractor/contv.py
new file mode 100644
index 0000000..84b462d
--- /dev/null
+++ b/hypervideo_dl/extractor/contv.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class CONtvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
+ 'info_dict': {
+ 'id': 'CEG10022949',
+ 'ext': 'mp4',
+ 'title': 'Days Of Thrills & Laughter',
+ 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
+ 'upload_date': '20180703',
+ 'timestamp': 1530634789.61,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
+ 'info_dict': {
+ 'id': 'CLIP-show_fotld_bts',
+ 'title': 'Fight of the Living Dead: Behind the Scenes Bites',
+ },
+ 'playlist_mincount': 7,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ details = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
+ video_id, query={'device': 'web'})
+
+ if details.get('type') == 'episodic':
+ seasons = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
+ video_id)
+ entries = []
+ for season in seasons:
+ for episode in season.get('episodes', []):
+ episode_id = episode.get('id')
+ if not episode_id:
+ continue
+ entries.append(self.url_result(
+ 'https://www.contv.com/details-movie/' + episode_id,
+ CONtvIE.ie_key(), episode_id))
+ return self.playlist_result(entries, video_id, details.get('title'))
+
+ m_details = details['details']
+ title = details['title']
+
+ formats = []
+
+ media_hls_url = m_details.get('media_hls_url')
+ if media_hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_hls_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+
+ media_mp4_url = m_details.get('media_mp4_url')
+ if media_mp4_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': media_mp4_url,
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ captions = m_details.get('captions') or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url
+ })
+
+ thumbnails = []
+ for image in m_details.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ description = None
+ for p in ('large_', 'medium_', 'small_', ''):
+ d = m_details.get(p + 'description')
+ if d:
+ description = d
+ break
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'timestamp': float_or_none(details.get('metax_added_on'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(m_details.get('duration'), 1000),
+ 'view_count': int_or_none(details.get('num_watched')),
+ 'like_count': int_or_none(details.get('num_fav')),
+ 'categories': details.get('category'),
+ 'tags': details.get('tags'),
+ 'season_number': int_or_none(details.get('season')),
+ 'episode_number': int_or_none(details.get('episode')),
+ 'release_year': int_or_none(details.get('pub_year')),
+ }
diff --git a/hypervideo_dl/extractor/corus.py b/hypervideo_dl/extractor/corus.py
new file mode 100644
index 0000000..e11aadf
--- /dev/null
+++ b/hypervideo_dl/extractor/corus.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformFeedIE
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+)
+
+
+class CorusIE(ThePlatformFeedIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?P<domain>
+ (?:
+ globaltv|
+ etcanada|
+ seriesplus|
+ wnetwork|
+ ytv
+ )\.com|
+ (?:
+ hgtv|
+ foodnetwork|
+ slice|
+ history|
+ showcase|
+ bigbrothercanada|
+ abcspark|
+ disney(?:channel|lachaine)
+ )\.ca
+ )
+ /(?:[^/]+/)*
+ (?:
+ video\.html\?.*?\bv=|
+ videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
+ )
+ (?P<id>
+ [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
+ (?:[A-Z]{4})?\d{12,20}
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
+ 'info_dict': {
+ 'id': '870923331648',
+ 'ext': 'mp4',
+ 'title': 'Movie Night Popcorn with Bryan',
+ 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
+ 'upload_date': '20170206',
+ 'timestamp': 1486392197,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON'],
+ }, {
+ 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
+ 'only_matching': True
+ }]
+ _GEO_BYPASS = False
+ _SITE_MAP = {
+ 'globaltv': 'series',
+ 'etcanada': 'series',
+ 'foodnetwork': 'food',
+ 'bigbrothercanada': 'series',
+ 'disneychannel': 'disneyen',
+ 'disneylachaine': 'disneyfr',
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[0]
+ path = self._SITE_MAP.get(site, site)
+ if path != 'series':
+ path = 'migration/' + path
+ video = self._download_json(
+ 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
+ video_id, query={'byId': video_id},
+ headers={'Accept': 'application/json'})[0]
+ title = video['title']
+
+ formats = []
+ for source in video.get('sources', []):
+ smil_url = source.get('file')
+ if not smil_url:
+ continue
+ source_type = source.get('type')
+ note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
+ resp = self._download_webpage(
+ smil_url, video_id, note, fatal=False,
+ headers=self.geo_verification_headers())
+ if not resp:
+ continue
+ error = self._parse_json(resp, video_id, fatal=False)
+ if error:
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['CA'])
+ raise ExtractorError(error['description'])
+ smil = self._parse_xml(resp, video_id, fatal=False)
+ if smil is None:
+ continue
+ namespace = self._parse_smil_namespace(smil)
+ formats.extend(self._parse_smil_formats(
+ smil, smil_url, video_id, namespace))
+ if not formats and video.get('drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for track in video.get('tracks', []):
+ track_url = track.get('file')
+ if not track_url:
+ continue
+ lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
+ subtitles.setdefault(lang, []).append({'url': track_url})
+
+ metadata = video.get('metadata') or {}
+ get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('availableDate'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(metadata.get('duration')),
+ 'series': dict_get(video, ('show', 'pl1$show')),
+ 'season_number': get_number('season'),
+ 'episode_number': get_number('episode'),
+ }
diff --git a/hypervideo_dl/extractor/coub.py b/hypervideo_dl/extractor/coub.py
new file mode 100644
index 0000000..6ea03e6
--- /dev/null
+++ b/hypervideo_dl/extractor/coub.py
@@ -0,0 +1,140 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+)
+
+
+class CoubIE(InfoExtractor):
+ _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
+
+ _TESTS = [{
+ 'url': 'http://coub.com/view/5u5n1',
+ 'info_dict': {
+ 'id': '5u5n1',
+ 'ext': 'mp4',
+ 'title': 'The Matrix Moonwalk',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4.6,
+ 'timestamp': 1428527772,
+ 'upload_date': '20150408',
+ 'uploader': 'Artyom Loskutnikov',
+ 'uploader_id': 'artyom.loskutnikov',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
+ 'only_matching': True,
+ }, {
+ 'url': 'coub:5u5n1',
+ 'only_matching': True,
+ }, {
+ # longer video id
+ 'url': 'http://coub.com/view/237d5l5h',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ coub = self._download_json(
+ 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
+
+ if coub.get('error'):
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
+
+ title = coub['title']
+
+ file_versions = coub['file_versions']
+
+ QUALITIES = ('low', 'med', 'high')
+
+ MOBILE = 'mobile'
+ IPHONE = 'iphone'
+ HTML5 = 'html5'
+
+ SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
+
+ quality_key = qualities(QUALITIES)
+ preference_key = qualities(SOURCE_PREFERENCE)
+
+ formats = []
+
+ for kind, items in file_versions.get(HTML5, {}).items():
+ if kind not in ('video', 'audio'):
+ continue
+ if not isinstance(items, dict):
+ continue
+ for quality, item in items.items():
+ if not isinstance(item, dict):
+ continue
+ item_url = item.get('url')
+ if not item_url:
+ continue
+ formats.append({
+ 'url': item_url,
+ 'format_id': '%s-%s-%s' % (HTML5, kind, quality),
+ 'filesize': int_or_none(item.get('size')),
+ 'vcodec': 'none' if kind == 'audio' else None,
+ 'quality': quality_key(quality),
+ 'preference': preference_key(HTML5),
+ })
+
+ iphone_url = file_versions.get(IPHONE, {}).get('url')
+ if iphone_url:
+ formats.append({
+ 'url': iphone_url,
+ 'format_id': IPHONE,
+ 'preference': preference_key(IPHONE),
+ })
+
+ mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
+ if mobile_url:
+ formats.append({
+ 'url': mobile_url,
+ 'format_id': '%s-audio' % MOBILE,
+ 'preference': preference_key(MOBILE),
+ })
+
+ self._sort_formats(formats)
+
+ thumbnail = coub.get('picture')
+ duration = float_or_none(coub.get('duration'))
+ timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
+ uploader = coub.get('channel', {}).get('title')
+ uploader_id = coub.get('channel', {}).get('permalink')
+
+ view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
+ like_count = int_or_none(coub.get('likes_count'))
+ repost_count = int_or_none(coub.get('recoubs_count'))
+
+ age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
+ if age_restricted is not None:
+ age_limit = 18 if age_restricted is True else 0
+ else:
+ age_limit = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'repost_count': repost_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cracked.py b/hypervideo_dl/extractor/cracked.py
new file mode 100644
index 0000000..f77a68e
--- /dev/null
+++ b/hypervideo_dl/extractor/cracked.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class CrackedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
+ _TESTS = [{
+ 'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
+ 'md5': '89b90b9824e3806ca95072c4d78f13f7',
+ 'info_dict': {
+ 'id': '19070',
+ 'ext': 'mp4',
+ 'title': 'If Animal Actors Got E! True Hollywood Stories',
+ 'timestamp': 1404954000,
+ 'upload_date': '20140710',
+ }
+ }, {
+ # youtube embed
+ 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
+ 'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
+ 'info_dict': {
+ 'id': 'EjI00A3rZD0',
+ 'ext': 'mp4',
+ 'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
+ 'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
+ 'upload_date': '20140725',
+ 'uploader_id': 'Cracked',
+ 'uploader': 'Cracked',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ youtube_url = YoutubeIE._extract_url(webpage)
+ if youtube_url:
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+
+ video_url = self._html_search_regex(
+ [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
+ webpage, 'video URL')
+
+ title = self._search_regex(
+ [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
+ webpage, 'title')
+
+ description = self._search_regex(
+ r'name="?(?:og:)?description"?\s+content="([^"]+)"',
+ webpage, 'description', default=None)
+
+ timestamp = self._html_search_regex(
+ r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
+ if timestamp:
+ timestamp = parse_iso8601(timestamp[:-6])
+
+ view_count = str_to_int(self._html_search_regex(
+ r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
+ webpage, 'comment count', fatal=False))
+
+ m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
+ if m:
+ width = int(m.group('width'))
+ height = int(m.group('height'))
+ else:
+ width = height = None
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'height': height,
+ 'width': width,
+ }
diff --git a/hypervideo_dl/extractor/crackle.py b/hypervideo_dl/extractor/crackle.py
new file mode 100644
index 0000000..49bf3a4
--- /dev/null
+++ b/hypervideo_dl/extractor/crackle.py
@@ -0,0 +1,200 @@
+# coding: utf-8
+from __future__ import unicode_literals, division
+
+import hashlib
+import hmac
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ parse_duration,
+ url_or_none,
+ ExtractorError
+)
+
+
+class CrackleIE(InfoExtractor):
+ _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+ _TESTS = [{
+ # geo restricted to CA
+ 'url': 'https://www.crackle.com/andromeda/2502343',
+ 'info_dict': {
+ 'id': '2502343',
+ 'ext': 'mp4',
+ 'title': 'Under The Night',
+ 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
+ 'duration': 2583,
+ 'view_count': int,
+ 'average_rating': 0,
+ 'age_limit': 14,
+ 'genre': 'Action, Sci-Fi',
+ 'creator': 'Allan Kroeker',
+ 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
+ 'release_year': 2000,
+ 'series': 'Andromeda',
+ 'episode': 'Under The Night',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.sonycrackle.com/andromeda/2502343',
+ 'only_matching': True,
+ }]
+
+ _MEDIA_FILE_SLOTS = {
+ '360p.mp4': {
+ 'width': 640,
+ 'height': 360,
+ },
+ '480p.mp4': {
+ 'width': 768,
+ 'height': 432,
+ },
+ '480p_1mbps.mp4': {
+ 'width': 852,
+ 'height': 480,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ country_code = self._downloader.params.get('geo_bypass_country', None)
+ countries = [country_code] if country_code else (
+ 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
+
+ last_e = None
+
+ for country in countries:
+ try:
+ # Authorization generation algorithm is reverse engineered from:
+ # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
+ media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
+ timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
+ h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
+ media = self._download_json(
+ media_detail_url, video_id, 'Downloading media JSON as %s' % country,
+ 'Unable to download media JSON', headers={
+ 'Accept': 'application/json',
+ 'Authorization': '|'.join([h, timestamp, '117', '1']),
+ })
+ except ExtractorError as e:
+ # 401 means geo restriction, trying next country
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ last_e = e
+ continue
+ raise
+
+ media_urls = media.get('MediaURLs')
+ if not media_urls or not isinstance(media_urls, list):
+ continue
+
+ title = media['Title']
+
+ formats = []
+ for e in media['MediaURLs']:
+ if e.get('UseDRM') is True:
+ continue
+ format_url = url_or_none(e.get('Path'))
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ elif format_url.endswith('.ism/Manifest'):
+ formats.extend(self._extract_ism_formats(
+ format_url, video_id, ism_id='mss', fatal=False))
+ else:
+ mfs_path = e.get('Type')
+ mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
+ if not mfs_info:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http-' + mfs_path.split('.')[0],
+ 'width': mfs_info['width'],
+ 'height': mfs_info['height'],
+ })
+ self._sort_formats(formats)
+
+ description = media.get('Description')
+ duration = int_or_none(media.get(
+ 'DurationInSeconds')) or parse_duration(media.get('Duration'))
+ view_count = int_or_none(media.get('CountViews'))
+ average_rating = float_or_none(media.get('UserRating'))
+ age_limit = parse_age_limit(media.get('Rating'))
+ genre = media.get('Genre')
+ release_year = int_or_none(media.get('ReleaseYear'))
+ creator = media.get('Directors')
+ artist = media.get('Cast')
+
+ if media.get('MediaTypeDisplayValue') == 'Full Episode':
+ series = media.get('ShowName')
+ episode = title
+ season_number = int_or_none(media.get('Season'))
+ episode_number = int_or_none(media.get('Episode'))
+ else:
+ series = episode = season_number = episode_number = None
+
+ subtitles = {}
+ cc_files = media.get('ClosedCaptionFiles')
+ if isinstance(cc_files, list):
+ for cc_file in cc_files:
+ if not isinstance(cc_file, dict):
+ continue
+ cc_url = url_or_none(cc_file.get('Path'))
+ if not cc_url:
+ continue
+ lang = cc_file.get('Locale') or 'en'
+ subtitles.setdefault(lang, []).append({'url': cc_url})
+
+ thumbnails = []
+ images = media.get('Images')
+ if isinstance(images, list):
+ for image_key, image_url in images.items():
+ mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
+ if not mobj:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'age_limit': age_limit,
+ 'genre': genre,
+ 'creator': creator,
+ 'artist': artist,
+ 'release_year': release_year,
+ 'series': series,
+ 'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+ raise last_e
diff --git a/hypervideo_dl/extractor/crooksandliars.py b/hypervideo_dl/extractor/crooksandliars.py
new file mode 100644
index 0000000..7fb782d
--- /dev/null
+++ b/hypervideo_dl/extractor/crooksandliars.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+)
+
+
+class CrooksAndLiarsIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ 'duration': 236,
+ }
+ }, {
+ 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
+
+ manifest = self._parse_json(
+ self._search_regex(
+ r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
+ video_id)
+
+ quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
+
+ formats = [{
+ 'url': item['url'],
+ 'format_id': item['type'],
+ 'quality': quality(item['type']),
+ } for item in manifest['flavors'] if item['mime'].startswith('video/')]
+ self._sort_formats(formats)
+
+ return {
+ 'url': url,
+ 'id': video_id,
+ 'title': manifest['title'],
+ 'description': manifest.get('description'),
+ 'thumbnail': self._proto_relative_url(manifest.get('poster')),
+ 'timestamp': int_or_none(manifest.get('created')),
+ 'uploader': manifest.get('author'),
+ 'duration': int_or_none(manifest.get('duration')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/crunchyroll.py b/hypervideo_dl/extractor/crunchyroll.py
new file mode 100644
index 0000000..bc2d1fa
--- /dev/null
+++ b/hypervideo_dl/extractor/crunchyroll.py
@@ -0,0 +1,686 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+import zlib
+
+from hashlib import sha1
+from math import pow, sqrt, floor
+from .common import InfoExtractor
+from .vrv import VRVIE
+from ..compat import (
+ compat_b64decode,
+ compat_etree_Element,
+ compat_etree_fromstring,
+ compat_str,
+ compat_urllib_parse_urlencode,
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ bytes_to_intlist,
+ extract_attributes,
+ float_or_none,
+ intlist_to_bytes,
+ int_or_none,
+ lowercase_escape,
+ merge_dicts,
+ remove_end,
+ sanitized_Request,
+ urlencode_postdata,
+ xpath_text,
+)
+from ..aes import (
+ aes_cbc_decrypt,
+)
+
+
+class CrunchyrollBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.crunchyroll.com/login'
+ _LOGIN_FORM = 'login_form'
+ _NETRC_MACHINE = 'crunchyroll'
+
+ def _call_rpc_api(self, method, video_id, note=None, data=None):
+ data = data or {}
+ data['req'] = 'RpcApi' + method
+ data = compat_urllib_parse_urlencode(data).encode('utf-8')
+ return self._download_xml(
+ 'https://www.crunchyroll.com/xml/',
+ video_id, note, fatal=False, data=data, headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ def is_logged(webpage):
+ return 'href="/logout"' in webpage
+
+ # Already logged in
+ if is_logged(login_page):
+ return
+
+ login_form_str = self._search_regex(
+ r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
+ login_page, 'login form', group='form')
+
+ post_url = extract_attributes(login_form_str).get('action')
+ if not post_url:
+ post_url = self._LOGIN_URL
+ elif not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
+
+ login_form.update({
+ 'login_form[name]': username,
+ 'login_form[password]': password,
+ })
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in', 'Wrong login info',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ # Successful login
+ if is_logged(response):
+ return
+
+ error = self._html_search_regex(
+ '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ @staticmethod
+ def _add_skip_wall(url):
+ parsed_url = compat_urlparse.urlparse(url)
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
+ # > This content may be inappropriate for some people.
+ # > Are you sure you want to continue?
+ # since it's not disabled by default in crunchyroll account's settings.
+ # See https://github.com/ytdl-org/youtube-dl/issues/7202.
+ qs['skip_wall'] = ['1']
+ return compat_urlparse.urlunparse(
+ parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+ IE_NAME = 'crunchyroll'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+ 'info_dict': {
+ 'id': '645513',
+ 'ext': 'mp4',
+ 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+ 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
+ 'upload_date': '20131013',
+ 'url': 're:(?!.*&amp)',
+ },
+ 'params': {
+ # rtmp
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
+ 'info_dict': {
+ 'id': '589804',
+ 'ext': 'flv',
+ 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
+ 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Danny Choo Network',
+ 'upload_date': '20120213',
+ },
+ 'params': {
+ # rtmp
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
+ 'info_dict': {
+ 'id': '702409',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Re:Zero Partners',
+ 'timestamp': 1462098900,
+ 'upload_date': '20160501',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
+ 'info_dict': {
+ 'id': '727589',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Kadokawa Pictures Inc.',
+ 'timestamp': 1484130900,
+ 'upload_date': '20170111',
+ 'series': compat_str,
+ 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
+ 'season_number': 2,
+ 'episode': 'Give Me Deliverance From This Judicial Injustice!',
+ 'episode_number': 1,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
+ 'only_matching': True,
+ }, {
+ # geo-restricted (US), 18+ maturity wall, non-premium available
+ 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
+ 'only_matching': True,
+ }, {
+ # A description with double quotes
+ 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
+ 'info_dict': {
+ 'id': '535080',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'uploader': 'Marvelous AQL Inc.',
+ 'timestamp': 1255512600,
+ 'upload_date': '20091014',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
+ }, {
+ # make sure we can extract an uploader name that's not a link
+ 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
+ 'info_dict': {
+ 'id': '606899',
+ 'ext': 'mp4',
+ 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
+ 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
+ 'uploader': 'Geneon Entertainment',
+ 'upload_date': '20120717',
+ },
+ 'params': {
+ # just test metadata extraction
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ # A video with a vastly different season name compared to the series name
+ 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
+ 'info_dict': {
+ 'id': '590532',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'uploader': 'TV TOKYO',
+ 'timestamp': 1330956000,
+ 'upload_date': '20120305',
+ 'series': 'Nyarko-san: Another Crawling Chaos',
+ 'season': 'Haiyoru! Nyaruani (ONA)',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.com/media-723735',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+ 'only_matching': True,
+ }]
+
+ _FORMAT_IDS = {
+ '360': ('60', '106'),
+ '480': ('61', '106'),
+ '720': ('62', '106'),
+ '1080': ('80', '108'),
+ }
+
+ def _download_webpage(self, url_or_request, *args, **kwargs):
+ request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ else sanitized_Request(url_or_request))
+ # Accept-Language must be set explicitly to accept any language to avoid issues
+ # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
+ # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+ # should be imposed or not (from what I can see it just takes the first language
+ # ignoring the priority and requires it to correspond the IP). By the way this causes
+ # Crunchyroll to not work in georestriction cases in some browsers that don't place
+ # the locale lang first in header. However allowing any language seems to workaround the issue.
+ request.add_header('Accept-Language', '*')
+ return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
+
+ def _decrypt_subtitles(self, data, iv, id):
+ data = bytes_to_intlist(compat_b64decode(data))
+ iv = bytes_to_intlist(compat_b64decode(iv))
+ id = int(id)
+
+ def obfuscate_key_aux(count, modulo, start):
+ output = list(start)
+ for _ in range(count):
+ output.append(output[-1] + output[-2])
+ # cut off start values
+ output = output[2:]
+ output = list(map(lambda x: x % modulo + 33, output))
+ return output
+
+ def obfuscate_key(key):
+ num1 = int(floor(pow(2, 25) * sqrt(6.9)))
+ num2 = (num1 ^ key) << 5
+ num3 = key ^ num1
+ num4 = num3 ^ (num3 >> 3) ^ num2
+ prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
+ shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
+ # Extend 160 Bit hash to 256 Bit
+ return shaHash + [0] * 12
+
+ key = obfuscate_key(id)
+
+ decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
+ return zlib.decompress(decrypted_data)
+
+ def _convert_subtitles_to_srt(self, sub_root):
+ output = ''
+
+ for i, event in enumerate(sub_root.findall('./events/event'), 1):
+ start = event.attrib['start'].replace('.', ',')
+ end = event.attrib['end'].replace('.', ',')
+ text = event.attrib['text'].replace('\\N', '\n')
+ output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+ return output
+
+ def _convert_subtitles_to_ass(self, sub_root):
+ output = ''
+
+ def ass_bool(strvalue):
+ assvalue = '0'
+ if strvalue == '1':
+ assvalue = '-1'
+ return assvalue
+
+ output = '[Script Info]\n'
+ output += 'Title: %s\n' % sub_root.attrib['title']
+ output += 'ScriptType: v4.00+\n'
+ output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
+ output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
+ output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
+ output += """
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+"""
+ for style in sub_root.findall('./styles/style'):
+ output += 'Style: ' + style.attrib['name']
+ output += ',' + style.attrib['font_name']
+ output += ',' + style.attrib['font_size']
+ output += ',' + style.attrib['primary_colour']
+ output += ',' + style.attrib['secondary_colour']
+ output += ',' + style.attrib['outline_colour']
+ output += ',' + style.attrib['back_colour']
+ output += ',' + ass_bool(style.attrib['bold'])
+ output += ',' + ass_bool(style.attrib['italic'])
+ output += ',' + ass_bool(style.attrib['underline'])
+ output += ',' + ass_bool(style.attrib['strikeout'])
+ output += ',' + style.attrib['scale_x']
+ output += ',' + style.attrib['scale_y']
+ output += ',' + style.attrib['spacing']
+ output += ',' + style.attrib['angle']
+ output += ',' + style.attrib['border_style']
+ output += ',' + style.attrib['outline']
+ output += ',' + style.attrib['shadow']
+ output += ',' + style.attrib['alignment']
+ output += ',' + style.attrib['margin_l']
+ output += ',' + style.attrib['margin_r']
+ output += ',' + style.attrib['margin_v']
+ output += ',' + style.attrib['encoding']
+ output += '\n'
+
+ output += """
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+"""
+ for event in sub_root.findall('./events/event'):
+ output += 'Dialogue: 0'
+ output += ',' + event.attrib['start']
+ output += ',' + event.attrib['end']
+ output += ',' + event.attrib['style']
+ output += ',' + event.attrib['name']
+ output += ',' + event.attrib['margin_l']
+ output += ',' + event.attrib['margin_r']
+ output += ',' + event.attrib['margin_v']
+ output += ',' + event.attrib['effect']
+ output += ',' + event.attrib['text']
+ output += '\n'
+
+ return output
+
+ def _extract_subtitles(self, subtitle):
+ sub_root = compat_etree_fromstring(subtitle)
+ return [{
+ 'ext': 'srt',
+ 'data': self._convert_subtitles_to_srt(sub_root),
+ }, {
+ 'ext': 'ass',
+ 'data': self._convert_subtitles_to_ass(sub_root),
+ }]
+
+ def _get_subtitles(self, video_id, webpage):
+ subtitles = {}
+ for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
+ sub_doc = self._call_rpc_api(
+ 'Subtitle_GetXml', video_id,
+ 'Downloading subtitles for ' + sub_name, data={
+ 'subtitle_script_id': sub_id,
+ })
+ if not isinstance(sub_doc, compat_etree_Element):
+ continue
+ sid = sub_doc.get('id')
+ iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
+ data = xpath_text(sub_doc, 'data', 'subtitle data')
+ if not sid or not iv or not data:
+ continue
+ subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
+ lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+ if not lang_code:
+ continue
+ subtitles[lang_code] = self._extract_subtitles(subtitle)
+ return subtitles
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+
+ if mobj.group('prefix') == 'm':
+ mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
+ webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
+ else:
+ webpage_url = 'http://www.' + mobj.group('url')
+
+ webpage = self._download_webpage(
+ self._add_skip_wall(webpage_url), video_id,
+ headers=self.geo_verification_headers())
+ note_m = self._html_search_regex(
+ r'<div class="showmedia-trailer-notice">(.+?)</div>',
+ webpage, 'trailer-notice', default='')
+ if note_m:
+ raise ExtractorError(note_m)
+
+ mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
+ if mobj:
+ msg = json.loads(mobj.group('msg'))
+ if msg.get('type') == 'error':
+ raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
+
+ if 'To view this, please log in to verify you are 18 or older.' in webpage:
+ self.raise_login_required()
+
+ media = self._parse_json(self._search_regex(
+ r'vilos\.config\.media\s*=\s*({.+?});',
+ webpage, 'vilos media', default='{}'), video_id)
+ media_metadata = media.get('metadata') or {}
+
+ language = self._search_regex(
+ r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
+ webpage, 'language', default=None, group='lang')
+
+ video_title = self._html_search_regex(
+ (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
+ r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
+ webpage, 'video_title', default=None)
+ if not video_title:
+ video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
+ video_title = re.sub(r' {2,}', ' ', video_title)
+ video_description = (self._parse_json(self._html_search_regex(
+ r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
+ webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
+ if video_description:
+ video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
+ video_uploader = self._html_search_regex(
+ # try looking for both an uploader that's a link and one that's not
+ [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
+ webpage, 'video_uploader', default=False)
+
+ formats = []
+ for stream in media.get('streams', []):
+ audio_lang = stream.get('audio_lang')
+ hardsub_lang = stream.get('hardsub_lang')
+ vrv_formats = self._extract_vrv_formats(
+ stream.get('url'), video_id, stream.get('format'),
+ audio_lang, hardsub_lang)
+ for f in vrv_formats:
+ if not hardsub_lang:
+ f['preference'] = 1
+ language_preference = 0
+ if audio_lang == language:
+ language_preference += 1
+ if hardsub_lang == language:
+ language_preference += 1
+ if language_preference:
+ f['language_preference'] = language_preference
+ formats.extend(vrv_formats)
+ if not formats:
+ available_fmts = []
+ for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
+ attrs = extract_attributes(a)
+ href = attrs.get('href')
+ if href and '/freetrial' in href:
+ continue
+ available_fmts.append(fmt)
+ if not available_fmts:
+ for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+ available_fmts = re.findall(p, webpage)
+ if available_fmts:
+ break
+ if not available_fmts:
+ available_fmts = self._FORMAT_IDS.keys()
+ video_encode_ids = []
+
+ for fmt in available_fmts:
+ stream_quality, stream_format = self._FORMAT_IDS[fmt]
+ video_format = fmt + 'p'
+ stream_infos = []
+ streamdata = self._call_rpc_api(
+ 'VideoPlayer_GetStandardConfig', video_id,
+ 'Downloading media info for %s' % video_format, data={
+ 'media_id': video_id,
+ 'video_format': stream_format,
+ 'video_quality': stream_quality,
+ 'current_page': url,
+ })
+ if isinstance(streamdata, compat_etree_Element):
+ stream_info = streamdata.find('./{default}preload/stream_info')
+ if stream_info is not None:
+ stream_infos.append(stream_info)
+ stream_info = self._call_rpc_api(
+ 'VideoEncode_GetStreamInfo', video_id,
+ 'Downloading stream info for %s' % video_format, data={
+ 'media_id': video_id,
+ 'video_format': stream_format,
+ 'video_encode_quality': stream_quality,
+ })
+ if isinstance(stream_info, compat_etree_Element):
+ stream_infos.append(stream_info)
+ for stream_info in stream_infos:
+ video_encode_id = xpath_text(stream_info, './video_encode_id')
+ if video_encode_id in video_encode_ids:
+ continue
+ video_encode_ids.append(video_encode_id)
+
+ video_file = xpath_text(stream_info, './file')
+ if not video_file:
+ continue
+ if video_file.startswith('http'):
+ formats.extend(self._extract_m3u8_formats(
+ video_file, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+
+ video_url = xpath_text(stream_info, './host')
+ if not video_url:
+ continue
+ metadata = stream_info.find('./metadata')
+ format_info = {
+ 'format': video_format,
+ 'height': int_or_none(xpath_text(metadata, './height')),
+ 'width': int_or_none(xpath_text(metadata, './width')),
+ }
+
+ if '.fplive.net/' in video_url:
+ video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+ parsed_video_url = compat_urlparse.urlparse(video_url)
+ direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+ netloc='v.lvlt.crcdn.net',
+ path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
+ if self._is_valid_url(direct_video_url, video_id, video_format):
+ format_info.update({
+ 'format_id': 'http-' + video_format,
+ 'url': direct_video_url,
+ })
+ formats.append(format_info)
+ continue
+
+ format_info.update({
+ 'format_id': 'rtmp-' + video_format,
+ 'url': video_url,
+ 'play_path': video_file,
+ 'ext': 'flv',
+ })
+ formats.append(format_info)
+ self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
+
+ metadata = self._call_rpc_api(
+ 'VideoPlayer_GetMediaMetadata', video_id,
+ note='Downloading media info', data={
+ 'media_id': video_id,
+ })
+
+ subtitles = {}
+ for subtitle in media.get('subtitles', []):
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
+ 'url': subtitle_url,
+ 'ext': subtitle.get('format', 'ass'),
+ })
+ if not subtitles:
+ subtitles = self.extract_subtitles(video_id, webpage)
+
+ # webpage provide more accurate data than series_title from XML
+ series = self._html_search_regex(
+ r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
+ webpage, 'series', fatal=False)
+
+ season = episode = episode_number = duration = thumbnail = None
+
+ if isinstance(metadata, compat_etree_Element):
+ season = xpath_text(metadata, 'series_title')
+ episode = xpath_text(metadata, 'episode_title')
+ episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+ duration = float_or_none(media_metadata.get('duration'), 1000)
+ thumbnail = xpath_text(metadata, 'episode_image_url')
+
+ if not episode:
+ episode = media_metadata.get('title')
+ if not episode_number:
+ episode_number = int_or_none(media_metadata.get('episode_number'))
+ if not thumbnail:
+ thumbnail = media_metadata.get('thumbnail', {}).get('url')
+
+ season_number = int_or_none(self._search_regex(
+ r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
+ webpage, 'season number', default=None))
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'uploader': video_uploader,
+ 'series': series,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }, info)
+
+
+class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:playlist'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
+
+ _TESTS = [{
+ 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'info_dict': {
+ 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
+ },
+ 'playlist_count': 13,
+ }, {
+ # geo-restricted (US), 18+ maturity wall, non-premium available
+ 'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
+ 'info_dict': {
+ 'id': 'cosplay-complex-ova',
+ 'title': 'Cosplay Complex OVA'
+ },
+ 'playlist_count': 3,
+ 'skip': 'Georestricted',
+ }, {
+ # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
+ 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ self._add_skip_wall(url), show_id,
+ headers=self.geo_verification_headers())
+ title = self._html_search_meta('name', webpage, default=None)
+
+ episode_paths = re.findall(
+ r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
+ webpage)
+ entries = [
+ self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
+ for ep_id, ep in episode_paths
+ ]
+ entries.reverse()
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': title,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/cspan.py b/hypervideo_dl/extractor/cspan.py
new file mode 100644
index 0000000..2e01aff
--- /dev/null
+++ b/hypervideo_dl/extractor/cspan.py
@@ -0,0 +1,244 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ extract_attributes,
+ find_xpath_attr,
+ get_element_by_attribute,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+ parse_iso8601,
+ smuggle_url,
+ str_to_int,
+ unescapeHTML,
+)
+from .senateisvp import SenateISVPIE
+from .ustream import UstreamIE
+
+
+class CSpanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
+ IE_DESC = 'C-SPAN'
+ _TESTS = [{
+ 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
+ 'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
+ 'info_dict': {
+ 'id': '315139',
+ 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
+ },
+ 'playlist_mincount': 2,
+ 'skip': 'Regularly fails on travis, for unknown reasons',
+ }, {
+ 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': 'c4486943',
+ 'ext': 'mp4',
+ 'title': 'CSPAN - International Health Care Models',
+ 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+ }
+ }, {
+ 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+ 'info_dict': {
+ 'id': '342759',
+ 'title': 'General Motors Ignition Switch Recall',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # Video from senate.gov
+ 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'mp4',
+ 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ }
+ }, {
+ # Ustream embedded video
+ 'url': 'https://www.c-span.org/video/?114917-1/armed-services',
+ 'info_dict': {
+ 'id': '58428542',
+ 'ext': 'flv',
+ 'title': 'USHR07 Armed Services Committee',
+ 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
+ 'timestamp': 1423060374,
+ 'upload_date': '20150204',
+ 'uploader': 'HouseCommittee',
+ 'uploader_id': '12987475',
+ },
+ }, {
+ # Audio Only
+ 'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_type = None
+ webpage = self._download_webpage(url, video_id)
+
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
+
+ if '&vod' not in url:
+ bc = self._search_regex(
+ r"(<[^>]+id='brightcove-player-embed'[^>]+>)",
+ webpage, 'brightcove embed', default=None)
+ if bc:
+ bc_attr = extract_attributes(bc)
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % (
+ bc_attr.get('data-bcaccountid', '3162030207001'),
+ bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'),
+ bc_attr.get('data-newbcplayerid', 'default'),
+ bc_attr['data-bcid'])
+ return self.url_result(smuggle_url(bc_url, {'source_url': url}))
+
+ def add_referer(formats):
+ for f in formats:
+ f.setdefault('http_headers', {})['Referer'] = url
+
+ # As of 01.12.2020 this path looks to cover all cases making the rest
+ # of the code unnecessary
+ jwsetup = self._parse_json(
+ self._search_regex(
+ r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if jwsetup:
+ info = self._parse_jwplayer_data(
+ jwsetup, video_id, require_title=False, m3u8_id='hls',
+ base_url=url)
+ add_referer(info['formats'])
+ for subtitles in info['subtitles'].values():
+ for subtitle in subtitles:
+ ext = determine_ext(subtitle['url'])
+ if ext == 'php':
+ ext = 'vtt'
+ subtitle['ext'] = ext
+ ld_info = self._search_json_ld(webpage, video_id, default={})
+ title = get_element_by_class('video-page-title', webpage) or \
+ self._og_search_title(webpage)
+ description = get_element_by_attribute('itemprop', 'description', webpage) or \
+ self._html_search_meta(['og:description', 'description'], webpage)
+ return merge_dicts(info, ld_info, {
+ 'title': title,
+ 'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
+ 'description': description,
+ 'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
+ 'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
+ 'duration': int_or_none(self._search_regex(
+ r'jwsetup\.seclength\s*=\s*(\d+);',
+ webpage, 'duration', fatal=False)),
+ 'view_count': str_to_int(self._search_regex(
+ r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
+ webpage, 'views', fatal=False)),
+ })
+
+ # Obsolete
+ # We first look for clipid, because clipprog always appears before
+ patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
+ results = list(filter(None, (re.search(p, webpage) for p in patterns)))
+ if results:
+ matches = results[0]
+ video_type, video_id = matches.groups()
+ video_type = 'clip' if video_type == 'id' else 'program'
+ else:
+ m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage)
+ if m:
+ video_id = m.group('id')
+ video_type = 'program' if m.group('type') == 'prog' else 'clip'
+ else:
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ title = self._og_search_title(webpage)
+ surl = smuggle_url(senate_isvp_url, {'force_title': title})
+ return self.url_result(surl, 'SenateISVP', video_id, title)
+ video_id = self._search_regex(
+ r'jwsetup\.clipprog\s*=\s*(\d+);',
+ webpage, 'jwsetup program id', default=None)
+ if video_id:
+ video_type = 'program'
+ if video_type is None or video_id is None:
+ error_message = get_element_by_class('VLplayer-error-message', webpage)
+ if error_message:
+ raise ExtractorError(error_message)
+ raise ExtractorError('unable to find video id and type')
+
+ def get_text_attr(d, attr):
+ return d.get(attr, {}).get('#text')
+
+ data = self._download_json(
+ 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id),
+ video_id)['video']
+ if data['@status'] != 'Success':
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True)
+
+ doc = self._download_xml(
+ 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id),
+ video_id)
+
+ description = self._html_search_meta('description', webpage)
+
+ title = find_xpath_attr(doc, './/string', 'name', 'title').text
+ thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
+
+ files = data['files']
+ capfile = get_text_attr(data, 'capfile')
+
+ entries = []
+ for partnum, f in enumerate(files):
+ formats = []
+ for quality in f.get('qualities', []):
+ formats.append({
+ 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
+ 'url': unescapeHTML(get_text_attr(quality, 'file')),
+ 'height': int_or_none(get_text_attr(quality, 'height')),
+ 'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
+ })
+ if not formats:
+ path = unescapeHTML(get_text_attr(f, 'path'))
+ if not path:
+ continue
+ formats = self._extract_m3u8_formats(
+ path, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
+ add_referer(formats)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': '%s_%d' % (video_id, partnum + 1),
+ 'title': (
+ title if len(files) == 1 else
+ '%s part %d' % (title, partnum + 1)),
+ 'formats': formats,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(get_text_attr(f, 'length')),
+ 'subtitles': {
+ 'en': [{
+ 'url': capfile,
+ 'ext': determine_ext(capfile, 'dfxp')
+ }],
+ } if capfile else None,
+ })
+
+ if len(entries) == 1:
+ entry = dict(entries[0])
+ entry['id'] = 'c' + video_id if video_type == 'clip' else video_id
+ return entry
+ else:
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ 'id': 'c' + video_id if video_type == 'clip' else video_id,
+ }
diff --git a/hypervideo_dl/extractor/ctsnews.py b/hypervideo_dl/extractor/ctsnews.py
new file mode 100644
index 0000000..679f1d9
--- /dev/null
+++ b/hypervideo_dl/extractor/ctsnews.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_timestamp
+from .youtube import YoutubeIE
+
+
+class CtsNewsIE(InfoExtractor):
+ IE_DESC = '華視新聞'
+ _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
+ 'md5': 'a9875cb790252b08431186d741beaabe',
+ 'info_dict': {
+ 'id': '201501291578109',
+ 'ext': 'mp4',
+ 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
+ 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
+ 'timestamp': 1422528540,
+ 'upload_date': '20150129',
+ }
+ }, {
+ # News count not appear on page but still available in database
+ 'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
+ 'md5': '3aee7e0df7cdff94e43581f54c22619e',
+ 'info_dict': {
+ 'id': '201309031304098',
+ 'ext': 'mp4',
+ 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
+ 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1378205880,
+ 'upload_date': '20130903',
+ }
+ }, {
+ # With Youtube embedded video
+ 'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
+ 'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
+ 'info_dict': {
+ 'id': 'OVbfO7d0_hQ',
+ 'ext': 'mp4',
+ 'title': 'iPhone6熱銷 蘋果財報亮眼',
+ 'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150128',
+ 'uploader_id': 'TBSCTS',
+ 'uploader': '中華電視公司',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ news_id = self._match_id(url)
+ page = self._download_webpage(url, news_id)
+
+ news_id = self._hidden_inputs(page).get('get_id')
+
+ if news_id:
+ mp4_feed = self._download_json(
+ 'http://news.cts.com.tw/action/test_mp4feed.php',
+ news_id, note='Fetching feed', query={'news_id': news_id})
+ video_url = mp4_feed['source_url']
+ else:
+ self.to_screen('Not CTSPlayer video, trying Youtube...')
+ youtube_url = YoutubeIE._extract_url(page)
+
+ return self.url_result(youtube_url, ie='Youtube')
+
+ description = self._html_search_meta('description', page)
+ title = self._html_search_meta('title', page, fatal=True)
+ thumbnail = self._html_search_meta('image', page)
+
+ datetime_str = self._html_search_regex(
+ r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
+ timestamp = None
+ if datetime_str:
+ timestamp = unified_timestamp(datetime_str) - 8 * 3600
+
+ return {
+ 'id': news_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ }
diff --git a/hypervideo_dl/extractor/ctv.py b/hypervideo_dl/extractor/ctv.py
new file mode 100644
index 0000000..756bcc2
--- /dev/null
+++ b/hypervideo_dl/extractor/ctv.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class CTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
+ 'info_dict': {
+ 'id': '2102249',
+ 'ext': 'flv',
+ 'title': 'Wednesday, December 23, 2020',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
+ 'timestamp': 1608732000,
+ 'upload_date': '20201223',
+ 'series': 'Your Morning',
+ 'season': '2020-2021',
+ 'season_number': 5,
+ 'episode_number': 88,
+ 'tags': ['Your Morning'],
+ 'categories': ['Talk Show'],
+ 'duration': 7467.126,
+ },
+ }, {
+ 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ content = self._download_json(
+ 'https://www.ctv.ca/space-graphql/graphql', display_id, query={
+ 'query': '''{
+ resolvedPath(path: "/%s") {
+ lastSegment {
+ content {
+ ... on AxisContent {
+ axisId
+ videoPlayerDestCode
+ }
+ }
+ }
+ }
+}''' % display_id,
+ })['data']['resolvedPath']['lastSegment']['content']
+ video_id = content['axisId']
+ return self.url_result(
+ '9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
+ 'NineCNineMedia', video_id)
diff --git a/hypervideo_dl/extractor/ctvnews.py b/hypervideo_dl/extractor/ctvnews.py
new file mode 100644
index 0000000..03f8cef
--- /dev/null
+++ b/hypervideo_dl/extractor/ctvnews.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import orderedSet
+
+
+class CTVNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
+ _TESTS = [{
+ 'url': 'http://www.ctvnews.ca/video?clipId=901995',
+ 'md5': '9b8624ba66351a23e0b6e1391971f9af',
+ 'info_dict': {
+ 'id': '901995',
+ 'ext': 'flv',
+ 'title': 'Extended: \'That person cannot be me\' Johnson says',
+ 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
+ 'timestamp': 1467286284,
+ 'upload_date': '20160630',
+ }
+ }, {
+ 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
+ 'info_dict':
+ {
+ 'id': '1.2966224',
+ },
+ 'playlist_mincount': 19,
+ }, {
+ 'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
+ 'info_dict':
+ {
+ 'id': '1.2876780',
+ },
+ 'playlist_mincount': 100,
+ }, {
+ 'url': 'http://www.ctvnews.ca/1.810401',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ def ninecninemedia_url_result(clip_id):
+ return {
+ '_type': 'url_transparent',
+ 'id': clip_id,
+ 'url': '9c9media:ctvnews_web:%s' % clip_id,
+ 'ie_key': 'NineCNineMedia',
+ }
+
+ if page_id.isdigit():
+ return ninecninemedia_url_result(page_id)
+ else:
+ webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
+ 'ot': 'example.AjaxPageLayout.ot',
+ 'maxItemsPerPage': 1000000,
+ })
+ entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
+ re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
+ return self.playlist_result(entries, page_id)
diff --git a/hypervideo_dl/extractor/cultureunplugged.py b/hypervideo_dl/extractor/cultureunplugged.py
new file mode 100644
index 0000000..bcdf273
--- /dev/null
+++ b/hypervideo_dl/extractor/cultureunplugged.py
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ HEADRequest,
+)
+
+
+class CultureUnpluggedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?'
+ _TESTS = [{
+ 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West',
+ 'md5': 'ac6c093b089f7d05e79934dcb3d228fc',
+ 'info_dict': {
+ 'id': '53662',
+ 'display_id': 'The-Next--Best-West',
+ 'ext': 'mp4',
+ 'title': 'The Next, Best West',
+ 'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'Coldstream Creative',
+ 'duration': 2203,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
+ self._request_webpage(HEADRequest(
+ 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
+ movie_data = self._download_json(
+ 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
+
+ video_url = movie_data['url']
+ title = movie_data['title']
+
+ description = movie_data.get('synopsis')
+ creator = movie_data.get('producer')
+ duration = int_or_none(movie_data.get('duration'))
+ view_count = int_or_none(movie_data.get('views'))
+
+ thumbnails = [{
+ 'url': movie_data['%s_thumb' % size],
+ 'id': size,
+ 'preference': preference,
+ } for preference, size in enumerate((
+ 'small', 'large')) if movie_data.get('%s_thumb' % size)]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'creator': creator,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/curiositystream.py b/hypervideo_dl/extractor/curiositystream.py
new file mode 100644
index 0000000..ae64a07
--- /dev/null
+++ b/hypervideo_dl/extractor/curiositystream.py
@@ -0,0 +1,174 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urlencode_postdata,
+ compat_str,
+ ExtractorError,
+)
+
+
+class CuriosityStreamBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'curiositystream'
+ _auth_token = None
+ _API_BASE_URL = 'https://api.curiositystream.com/v1/'
+
+ def _handle_errors(self, result):
+ error = result.get('error', {}).get('message')
+ if error:
+ if isinstance(error, dict):
+ error = ', '.join(error.values())
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ def _call_api(self, path, video_id, query=None):
+ headers = {}
+ if self._auth_token:
+ headers['X-Auth-Token'] = self._auth_token
+ result = self._download_json(
+ self._API_BASE_URL + path, video_id, headers=headers, query=query)
+ self._handle_errors(result)
+ return result['data']
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+ result = self._download_json(
+ self._API_BASE_URL + 'login', None, data=urlencode_postdata({
+ 'email': email,
+ 'password': password,
+ }))
+ self._handle_errors(result)
+ self._auth_token = result['message']['auth_token']
+
+
+class CuriosityStreamIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://app.curiositystream.com/video/2',
+ 'info_dict': {
+ 'id': '2',
+ 'ext': 'mp4',
+ 'title': 'How Did You Develop The Internet?',
+ 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats = []
+ for encoding_format in ('m3u8', 'mpd'):
+ media = self._call_api('media/' + video_id, video_id, query={
+ 'encodingsNew': 'true',
+ 'encodingsFormat': encoding_format,
+ })
+ for encoding in media.get('encodings', []):
+ playlist_url = encoding.get('master_playlist_url')
+ if encoding_format == 'm3u8':
+ # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+ formats.extend(self._extract_m3u8_formats(
+ playlist_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif encoding_format == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ playlist_url, video_id, mpd_id='dash', fatal=False))
+ encoding_url = encoding.get('url')
+ file_url = encoding.get('file_url')
+ if not encoding_url and not file_url:
+ continue
+ f = {
+ 'width': int_or_none(encoding.get('width')),
+ 'height': int_or_none(encoding.get('height')),
+ 'vbr': int_or_none(encoding.get('video_bitrate')),
+ 'abr': int_or_none(encoding.get('audio_bitrate')),
+ 'filesize': int_or_none(encoding.get('size_in_bytes')),
+ 'vcodec': encoding.get('video_codec'),
+ 'acodec': encoding.get('audio_codec'),
+ 'container': encoding.get('container_type'),
+ }
+ for f_url in (encoding_url, file_url):
+ if not f_url:
+ continue
+ fmt = f.copy()
+ rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ })
+ else:
+ fmt.update({
+ 'url': f_url,
+ 'format_id': 'http',
+ })
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ title = media['title']
+
+ subtitles = {}
+ for closed_caption in media.get('closed_captions', []):
+ sub_url = closed_caption.get('file')
+ if not sub_url:
+ continue
+ lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': media.get('description'),
+ 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
+ 'duration': int_or_none(media.get('duration')),
+ 'tags': media.get('tags'),
+ 'subtitles': subtitles,
+ }
+
+
+class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream:collection'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://app.curiositystream.com/collection/2',
+ 'info_dict': {
+ 'id': '2',
+ 'title': 'Curious Minds: The Internet',
+ 'description': 'How is the internet shaping our lives in the 21st Century?',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://curiositystream.com/series/2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ collection = self._call_api(
+ 'collections/' + collection_id, collection_id)
+ entries = []
+ for media in collection.get('media', []):
+ media_id = compat_str(media.get('id'))
+ entries.append(self.url_result(
+ 'https://curiositystream.com/video/' + media_id,
+ CuriosityStreamIE.ie_key(), media_id))
+ return self.playlist_result(
+ entries, collection_id,
+ collection.get('title'), collection.get('description'))
diff --git a/hypervideo_dl/extractor/cwtv.py b/hypervideo_dl/extractor/cwtv.py
new file mode 100644
index 0000000..7338243
--- /dev/null
+++ b/hypervideo_dl/extractor/cwtv.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+)
+
+
+class CWTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
+ _TESTS = [{
+ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'info_dict': {
+ 'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'ext': 'mp4',
+ 'title': 'Legends of Yesterday',
+ 'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
+ 'duration': 2665,
+ 'series': 'Arrow',
+ 'season_number': 4,
+ 'season': '4',
+ 'episode_number': 8,
+ 'upload_date': '20151203',
+ 'timestamp': 1449122100,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'redirect to http://cwtv.com/shows/arrow/',
+ }, {
+ 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
+ 'info_dict': {
+ 'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
+ 'ext': 'mp4',
+ 'title': 'Jeff Davis 4',
+ 'description': 'Jeff Davis is back to make you laugh.',
+ 'duration': 1263,
+ 'series': 'Whose Line Is It Anyway?',
+ 'season_number': 11,
+ 'episode_number': 20,
+ 'upload_date': '20151006',
+ 'timestamp': 1444107300,
+ 'age_limit': 14,
+ 'uploader': 'CWTV',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(
+ 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
+ video_id)
+ if data.get('result') != 'ok':
+ raise ExtractorError(data['msg'], expected=True)
+ video_data = data['video']
+ title = video_data['title']
+ mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
+
+ season = str_or_none(video_data.get('season'))
+ episode = str_or_none(video_data.get('episode'))
+ if episode and season:
+ episode = episode[len(season):]
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': smuggle_url(mpx_url, {'force_smil_url': True}),
+ 'description': video_data.get('description_long'),
+ 'duration': int_or_none(video_data.get('duration_secs')),
+ 'series': video_data.get('series_name'),
+ 'season_number': int_or_none(season),
+ 'episode_number': int_or_none(episode),
+ 'timestamp': parse_iso8601(video_data.get('start_time')),
+ 'age_limit': parse_age_limit(video_data.get('rating')),
+ 'ie_key': 'ThePlatform',
+ }
diff --git a/hypervideo_dl/extractor/dailymail.py b/hypervideo_dl/extractor/dailymail.py
new file mode 100644
index 0000000..67b88fd
--- /dev/null
+++ b/hypervideo_dl/extractor/dailymail.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ determine_protocol,
+ try_get,
+ unescapeHTML,
+)
+
+
+class DailyMailIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
+ 'md5': 'f6129624562251f628296c3a9ffde124',
+ 'info_dict': {
+ 'id': '1295863',
+ 'ext': 'mp4',
+ 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
+ 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
+ }
+ }, {
+ 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_data = self._parse_json(self._search_regex(
+ r"data-opts='({.+?})'", webpage, 'video data'), video_id)
+ title = unescapeHTML(video_data['title'])
+
+ sources_url = (try_get(
+ video_data,
+ (lambda x: x['plugins']['sources']['url'],
+ lambda x: x['sources']['url']), compat_str)
+ or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
+
+ video_sources = self._download_json(sources_url, video_id)
+ body = video_sources.get('body')
+ if body:
+ video_sources = body
+
+ formats = []
+ for rendition in video_sources['renditions']:
+ rendition_url = rendition.get('url')
+ if not rendition_url:
+ continue
+ tbr = int_or_none(rendition.get('encodingRate'), 1000)
+ container = rendition.get('videoContainer')
+ is_hls = container == 'M2TS'
+ protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
+ formats.append({
+ 'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''),
+ 'url': rendition_url,
+ 'width': int_or_none(rendition.get('frameWidth')),
+ 'height': int_or_none(rendition.get('frameHeight')),
+ 'tbr': tbr,
+ 'vcodec': rendition.get('videoCodec'),
+ 'container': container,
+ 'protocol': protocol,
+ 'ext': 'mp4' if is_hls else None,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': unescapeHTML(video_data.get('descr')),
+ 'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/dailymotion.py b/hypervideo_dl/extractor/dailymotion.py
new file mode 100644
index 0000000..b852905
--- /dev/null
+++ b/hypervideo_dl/extractor/dailymotion.py
@@ -0,0 +1,393 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ age_restricted,
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ OnDemandPagedList,
+ try_get,
+ unescapeHTML,
+ urlencode_postdata,
+)
+
+
+class DailymotionBaseInfoExtractor(InfoExtractor):
+ _FAMILY_FILTER = None
+ _HEADERS = {
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://www.dailymotion.com',
+ }
+ _NETRC_MACHINE = 'dailymotion'
+
+ def _get_dailymotion_cookies(self):
+ return self._get_cookies('https://www.dailymotion.com/')
+
+ @staticmethod
+ def _get_cookie_value(cookies, name):
+ cookie = cookies.get(name)
+ if cookie:
+ return cookie.value
+
+ def _set_dailymotion_cookie(self, name, value):
+ self._set_cookie('www.dailymotion.com', name, value)
+
+ def _real_initialize(self):
+ cookies = self._get_dailymotion_cookies()
+ ff = self._get_cookie_value(cookies, 'ff')
+ self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
+ self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
+
+ def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
+ if not self._HEADERS.get('Authorization'):
+ cookies = self._get_dailymotion_cookies()
+ token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
+ if not token:
+ data = {
+ 'client_id': 'f1a362d288c1b98099c7',
+ 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
+ }
+ username, password = self._get_login_info()
+ if username:
+ data.update({
+ 'grant_type': 'password',
+ 'password': password,
+ 'username': username,
+ })
+ else:
+ data['grant_type'] = 'client_credentials'
+ try:
+ token = self._download_json(
+ 'https://graphql.api.dailymotion.com/oauth/token',
+ None, 'Downloading Access Token',
+ data=urlencode_postdata(data))['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), xid)['error_description'], expected=True)
+ raise
+ self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
+ self._HEADERS['Authorization'] = 'Bearer ' + token
+
+ resp = self._download_json(
+ 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
+ 'query': '''{
+ %s(xid: "%s"%s) {
+ %s
+ }
+}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
+ }).encode(), headers=self._HEADERS)
+ obj = resp['data'][object_type]
+ if not obj:
+ raise ExtractorError(resp['errors'][0]['message'], expected=True)
+ return obj
+
+
+class DailymotionIE(DailymotionBaseInfoExtractor):
+ _VALID_URL = r'''(?ix)
+ https?://
+ (?:
+ (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
+ (?:www\.)?lequipe\.fr/video
+ )
+ /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
+ '''
+ IE_NAME = 'dailymotion'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
+ 'md5': '074b95bdee76b9e3654137aee9c79dfe',
+ 'info_dict': {
+ 'id': 'x5kesuj',
+ 'ext': 'mp4',
+ 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
+ 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
+ 'duration': 187,
+ 'timestamp': 1493651285,
+ 'upload_date': '20170501',
+ 'uploader': 'Deadline',
+ 'uploader_id': 'x1xm8ri',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+ 'md5': '2137c41a8e78554bb09225b8eb322406',
+ 'info_dict': {
+ 'id': 'x2iuewm',
+ 'ext': 'mp4',
+ 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
+ 'description': 'Several come bundled with the Steam Controller.',
+ 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
+ 'duration': 74,
+ 'timestamp': 1425657362,
+ 'upload_date': '20150306',
+ 'uploader': 'IGN',
+ 'uploader_id': 'xijv66',
+ 'age_limit': 0,
+ 'view_count': int,
+ },
+ 'skip': 'video gone',
+ }, {
+ # Vevo video
+ 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
+ 'info_dict': {
+ 'title': 'Roar (Official)',
+ 'id': 'USUV71301934',
+ 'ext': 'mp4',
+ 'uploader': 'Katy Perry',
+ 'upload_date': '20130905',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'VEVO is only available in some countries',
+ }, {
+ # age-restricted video
+ 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+ 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
+ 'info_dict': {
+ 'id': 'xyh2zz',
+ 'ext': 'mp4',
+ 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+ 'uploader': 'HotWaves1012',
+ 'age_limit': 18,
+ },
+ 'skip': 'video gone',
+ }, {
+ # geo-restricted, player v5
+ 'url': 'http://www.dailymotion.com/video/xhza0o',
+ 'only_matching': True,
+ }, {
+ # with subtitles
+ 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/x791mem',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _COMMON_MEDIA_FIELDS = '''description
+ geoblockedCountries {
+ allowed
+ }
+ xid'''
+
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ # Look for embedded Dailymotion player
+ # https://developer.dailymotion.com/player#player-parameters
+ for mobj in re.finditer(
+ r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
+ urls.append(unescapeHTML(mobj.group('url')))
+ for mobj in re.finditer(
+ r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
+ urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
+ return urls
+
+ def _real_extract(self, url):
+ video_id, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if playlist_id:
+ if not self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+ return self.url_result(
+ 'http://www.dailymotion.com/playlist/' + playlist_id,
+ 'DailymotionPlaylist', playlist_id)
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ password = self._downloader.params.get('videopassword')
+ media = self._call_api(
+ 'media', video_id, '''... on Video {
+ %s
+ stats {
+ likes {
+ total
+ }
+ views {
+ total
+ }
+ }
+ }
+ ... on Live {
+ %s
+ audienceCount
+ isOnAir
+ }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
+ 'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
+ xid = media['xid']
+
+ metadata = self._download_json(
+ 'https://www.dailymotion.com/player/metadata/video/' + xid,
+ xid, 'Downloading metadata JSON',
+ query={'app': 'com.dailymotion.neon'})
+
+ error = metadata.get('error')
+ if error:
+ title = error.get('title') or error['raw_message']
+ # See https://developer.dailymotion.com/api#access-error
+ if error.get('code') == 'DM007':
+ allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
+ self.raise_geo_restricted(msg=title, countries=allowed_countries)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, title), expected=True)
+
+ title = metadata['title']
+ is_live = media.get('isOnAir')
+ formats = []
+ for quality, media_list in metadata['qualities'].items():
+ for m in media_list:
+ media_url = m.get('url')
+ media_type = m.get('type')
+ if not media_url or media_type == 'application/vnd.lumberjack.manifest':
+ continue
+ if media_type == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ f = {
+ 'url': media_url,
+ 'format_id': 'http-' + quality,
+ }
+ m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
+ if m:
+ width, height, fps = map(int_or_none, m.groups())
+ f.update({
+ 'fps': fps,
+ 'height': height,
+ 'width': width,
+ })
+ formats.append(f)
+ for f in formats:
+ f['url'] = f['url'].split('#')[0]
+ if not f.get('fps') and f['format_id'].endswith('@60'):
+ f['fps'] = 60
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
+ for subtitle_lang, subtitle in subtitles_data.items():
+ subtitles[subtitle_lang] = [{
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
+
+ thumbnails = []
+ for height, poster_url in metadata.get('posters', {}).items():
+ thumbnails.append({
+ 'height': int_or_none(height),
+ 'id': height,
+ 'url': poster_url,
+ })
+
+ owner = metadata.get('owner') or {}
+ stats = media.get('stats') or {}
+ get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(media.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(metadata.get('duration')) or None,
+ 'timestamp': int_or_none(metadata.get('created_time')),
+ 'uploader': owner.get('screenname'),
+ 'uploader_id': owner.get('id') or metadata.get('screenname'),
+ 'age_limit': 18 if metadata.get('explicit') else 0,
+ 'tags': metadata.get('tags'),
+ 'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
+ 'like_count': get_count('like'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+
+class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, playlist_id, page):
+ page += 1
+ videos = self._call_api(
+ self._OBJECT_TYPE, playlist_id,
+ '''videos(allowExplicit: %s, first: %d, page: %d) {
+ edges {
+ node {
+ xid
+ url
+ }
+ }
+ }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
+ 'Downloading page %d' % page)['videos']
+ for edge in videos['edges']:
+ node = edge['node']
+ yield self.url_result(
+ node['url'], DailymotionIE.ie_key(), node['xid'])
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, playlist_id), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, playlist_id)
+
+
+class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:playlist'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+ 'info_dict': {
+ 'id': 'xv4bw',
+ },
+ 'playlist_mincount': 20,
+ }]
+ _OBJECT_TYPE = 'collection'
+
+
+class DailymotionUserIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:user'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 152,
+ }, {
+ 'url': 'http://www.dailymotion.com/user/UnderProject',
+ 'info_dict': {
+ 'id': 'UnderProject',
+ },
+ 'playlist_mincount': 1000,
+ 'skip': 'Takes too long time',
+ }, {
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 148,
+ 'params': {
+ 'age_limit': 0,
+ },
+ }]
+ _OBJECT_TYPE = 'channel'
diff --git a/hypervideo_dl/extractor/daum.py b/hypervideo_dl/extractor/daum.py
new file mode 100644
index 0000000..1370955
--- /dev/null
+++ b/hypervideo_dl/extractor/daum.py
@@ -0,0 +1,266 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+
+
+class DaumBaseIE(InfoExtractor):
+ _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
+
+
+class DaumIE(DaumBaseIE):
+ _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
+ IE_NAME = 'daum.net'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
+ 'info_dict': {
+ 'id': 'vab4dyeDBysyBssyukBUjBz',
+ 'ext': 'mp4',
+ 'title': '마크 헌트 vs 안토니오 실바',
+ 'description': 'Mark Hunt vs Antonio Silva',
+ 'upload_date': '20131217',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 2117,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader_id': 186139,
+ 'uploader': '콘간지',
+ 'timestamp': 1387310323,
+ },
+ }, {
+ 'url': 'http://m.tvpot.daum.net/v/65139429',
+ 'info_dict': {
+ 'id': '65139429',
+ 'ext': 'mp4',
+ 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
+ 'description': 'md5:79794514261164ff27e36a21ad229fc5',
+ 'upload_date': '20150118',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 154,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': 'MBC 예능',
+ 'uploader_id': 132251,
+ 'timestamp': 1421604228,
+ },
+ }, {
+ 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
+ 'info_dict': {
+ 'id': 'vwIpVpCQsT8$',
+ 'ext': 'flv',
+ 'title': '01-Korean War ( Trouble on the horizon )',
+ 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
+ 'upload_date': '20080223',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 249,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': '까칠한 墮落始祖 황비홍님의',
+ 'uploader_id': 560824,
+ 'timestamp': 1203770745,
+ },
+ }, {
+ # Requires dte_type=WEB (#9972)
+ 'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': 's3794Uf1NZeZ1qMpGpeqeRU',
+ 'ext': 'mp4',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'upload_date': '20170129',
+ 'uploader': '쇼! 음악중심',
+ 'uploader_id': 2653210,
+ 'timestamp': 1485684628,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = compat_urllib_parse_unquote(self._match_id(url))
+ if not video_id.isdigit():
+ video_id += '@my'
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
+
+
+class DaumClipIE(DaumBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
+ IE_NAME = 'daum.net:clip'
+ _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+ 'info_dict': {
+ 'id': '52554690',
+ 'ext': 'mp4',
+ 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'upload_date': '20130831',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 3868,
+ 'view_count': int,
+ 'uploader': 'GOMeXP',
+ 'uploader_id': 6667,
+ 'timestamp': 1377911092,
+ },
+ }, {
+ 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
+
+
+class DaumListIE(InfoExtractor):
+ def _get_entries(self, list_id, list_id_type):
+ name = None
+ entries = []
+ for pagenum in itertools.count(1):
+ list_info = self._download_json(
+ 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
+ pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
+
+ entries.extend([
+ self.url_result(
+ 'http://tvpot.daum.net/v/%s' % clip['vid'])
+ for clip in list_info['clip_list']
+ ])
+
+ if not name:
+ name = list_info.get('playlist_bean', {}).get('name') or \
+ list_info.get('potInfo', {}).get('name')
+
+ if not list_info.get('has_more'):
+ break
+
+ return name, entries
+
+ def _check_clip(self, url, list_id):
+ query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ if 'clipid' in query_dict:
+ clip_id = query_dict['clipid'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
+ return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
+
+
+class DaumPlaylistIE(DaumListIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
+ IE_NAME = 'daum.net:playlist'
+ _URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
+
+ _TESTS = [{
+ 'note': 'Playlist url with clipid',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
+ 'info_dict': {
+ 'id': '6213966',
+ 'title': 'Woorissica Official',
+ },
+ 'playlist_mincount': 181
+ }, {
+ 'note': 'Playlist url with clipid - noplaylist',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
+ 'info_dict': {
+ 'id': '73806844',
+ 'ext': 'mp4',
+ 'title': '151017 Airport',
+ 'upload_date': '20160117',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ clip_result = self._check_clip(url, list_id)
+ if clip_result:
+ return clip_result
+
+ name, entries = self._get_entries(list_id, 'playlistid')
+
+ return self.playlist_result(entries, list_id, name)
+
+
+class DaumUserIE(DaumListIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
+ IE_NAME = 'daum.net:user'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
+ 'info_dict': {
+ 'id': 'o2scDLIVbHc0',
+ 'title': '마이 리틀 텔레비전',
+ },
+ 'playlist_mincount': 213
+ }, {
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
+ 'info_dict': {
+ 'id': '73801156',
+ 'ext': 'mp4',
+ 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
+ 'upload_date': '20160117',
+ 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }, {
+ 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
+ 'info_dict': {
+ 'id': '6196631',
+ 'title': '마이 리틀 텔레비전 - 20160109',
+ },
+ 'playlist_count': 11
+ }, {
+ 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ clip_result = self._check_clip(url, list_id)
+ if clip_result:
+ return clip_result
+
+ query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ if 'playlistid' in query_dict:
+ playlist_id = query_dict['playlistid'][0]
+ return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
+
+ name, entries = self._get_entries(list_id, 'ownerid')
+
+ return self.playlist_result(entries, list_id, name)
diff --git a/hypervideo_dl/extractor/dbtv.py b/hypervideo_dl/extractor/dbtv.py
new file mode 100644
index 0000000..aaedf2e
--- /dev/null
+++ b/hypervideo_dl/extractor/dbtv.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DBTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
+ 'md5': 'b8f850ba1860adbda668d367f9b77699',
+ 'info_dict': {
+ 'id': 'PynxJnNWChE',
+ 'ext': 'mp4',
+ 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
+ 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'upload_date': '20160916',
+ 'duration': 69,
+ 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
+ 'uploader': 'Dagbladet',
+ },
+ 'add_ie': ['Youtube']
+ }, {
+ 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+ info = {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ }
+ if len(video_id) == 11:
+ info.update({
+ 'url': video_id,
+ 'ie_key': 'Youtube',
+ })
+ else:
+ info.update({
+ 'url': 'jwplatform:' + video_id,
+ 'ie_key': 'JWPlatform',
+ })
+ return info
diff --git a/hypervideo_dl/extractor/dctp.py b/hypervideo_dl/extractor/dctp.py
new file mode 100644
index 0000000..e700f8d
--- /dev/null
+++ b/hypervideo_dl/extractor/dctp.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class DctpTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # 4x3
+ 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+ 'md5': '3ffbd1556c3fe210724d7088fad723e3',
+ 'info_dict': {
+ 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
+ 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
+ 'ext': 'm4v',
+ 'title': 'Videoinstallation für eine Kaufhausfassade',
+ 'description': 'Kurzfilm',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 71.24,
+ 'timestamp': 1302172322,
+ 'upload_date': '20110407',
+ },
+ }, {
+ # 16x9
+ 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
+ 'only_matching': True,
+ }]
+
+ _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ version = self._download_json(
+ '%s/version.json' % self._BASE_URL, display_id,
+ 'Downloading version JSON')
+
+ restapi_base = '%s/%s/restapi' % (
+ self._BASE_URL, version['version_name'])
+
+ info = self._download_json(
+ '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
+ 'Downloading video info JSON')
+
+ media = self._download_json(
+ '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
+ display_id, 'Downloading media JSON')
+
+ uuid = media['uuid']
+ title = media['title']
+ is_wide = media.get('is_wide')
+ formats = []
+
+ def add_formats(suffix):
+ templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
+ formats.extend([{
+ 'format_id': 'hls-' + suffix,
+ 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
+ 'protocol': 'm3u8_native',
+ }, {
+ 'format_id': 's3-' + suffix,
+ 'url': templ % 'completed-media.s3.amazonaws.com',
+ }, {
+ 'format_id': 'http-' + suffix,
+ 'url': templ % 'cdn-media.dctp.tv',
+ }])
+
+ add_formats('0500_' + ('16x9' if is_wide else '4x3'))
+ if is_wide:
+ add_formats('720p')
+
+ thumbnails = []
+ images = media.get('images')
+ if isinstance(images, list):
+ for image in images:
+ if not isinstance(image, dict):
+ continue
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ return {
+ 'id': uuid,
+ 'display_id': display_id,
+ 'title': title,
+ 'alt_title': media.get('subtitle'),
+ 'description': media.get('description') or media.get('teaser'),
+ 'timestamp': unified_timestamp(media.get('created')),
+ 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/deezer.py b/hypervideo_dl/extractor/deezer.py
new file mode 100644
index 0000000..a38b268
--- /dev/null
+++ b/hypervideo_dl/extractor/deezer.py
@@ -0,0 +1,91 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ orderedSet,
+)
+
+
+class DeezerPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.deezer.com/playlist/176747451',
+ 'info_dict': {
+ 'id': '176747451',
+ 'title': 'Best!',
+ 'uploader': 'Anonymous',
+ 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
+ },
+ 'playlist_count': 30,
+ 'skip': 'Only available in .de',
+ }
+
+ def _real_extract(self, url):
+ if 'test' not in self._downloader.params:
+ self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
+
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, playlist_id)
+ geoblocking_msg = self._html_search_regex(
+ r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
+ default=None)
+ if geoblocking_msg is not None:
+ raise ExtractorError(
+ 'Deezer said: %s' % geoblocking_msg, expected=True)
+
+ data_json = self._search_regex(
+ (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
+ r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
+ webpage, 'data JSON')
+ data = json.loads(data_json)
+
+ playlist_title = data.get('DATA', {}).get('TITLE')
+ playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME')
+ playlist_thumbnail = self._search_regex(
+ r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
+ 'playlist thumbnail')
+
+ preview_pattern = self._search_regex(
+ r"var SOUND_PREVIEW_GATEWAY\s*=\s*'([^']+)';", webpage,
+ 'preview URL pattern', fatal=False)
+ entries = []
+ for s in data['SONGS']['data']:
+ puid = s['MD5_ORIGIN']
+ preview_video_url = preview_pattern.\
+ replace('{0}', puid[0]).\
+ replace('{1}', puid).\
+ replace('{2}', s['MEDIA_VERSION'])
+ formats = [{
+ 'format_id': 'preview',
+ 'url': preview_video_url,
+ 'preference': -100, # Only the first 30 seconds
+ 'ext': 'mp3',
+ }]
+ self._sort_formats(formats)
+ artists = ', '.join(
+ orderedSet(a['ART_NAME'] for a in s['ARTISTS']))
+ entries.append({
+ 'id': s['SNG_ID'],
+ 'duration': int_or_none(s.get('DURATION')),
+ 'title': '%s - %s' % (artists, s['SNG_TITLE']),
+ 'uploader': s['ART_NAME'],
+ 'uploader_id': s['ART_ID'],
+ 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
+ 'formats': formats,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'uploader': playlist_uploader,
+ 'thumbnail': playlist_thumbnail,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/defense.py b/hypervideo_dl/extractor/defense.py
new file mode 100644
index 0000000..9fe144e
--- /dev/null
+++ b/hypervideo_dl/extractor/defense.py
@@ -0,0 +1,39 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class DefenseGouvFrIE(InfoExtractor):
+ IE_NAME = 'defense.gouv.fr'
+ _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
+
+ _TEST = {
+ 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
+ 'md5': '75bba6124da7e63d2d60b5244ec9430c',
+ 'info_dict': {
+ 'id': '11213',
+ 'ext': 'mp4',
+ 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
+ }
+ }
+
+ def _real_extract(self, url):
+ title = self._match_id(url)
+ webpage = self._download_webpage(url, title)
+
+ video_id = self._search_regex(
+ r"flashvars.pvg_id=\"(\d+)\";",
+ webpage, 'ID')
+
+ json_url = (
+ 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
+ video_id)
+ info = self._download_json(json_url, title, 'Downloading JSON config')
+ video_url = info['renditions'][0]['url']
+
+ return {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': title,
+ }
diff --git a/hypervideo_dl/extractor/democracynow.py b/hypervideo_dl/extractor/democracynow.py
new file mode 100644
index 0000000..5c9c0ec
--- /dev/null
+++ b/hypervideo_dl/extractor/democracynow.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import os.path
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ url_basename,
+ remove_start,
+)
+
+
+class DemocracynowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)'
+ IE_NAME = 'democracynow'
+ _TESTS = [{
+ 'url': 'http://www.democracynow.org/shows/2015/7/3',
+ 'md5': '3757c182d3d84da68f5c8f506c18c196',
+ 'info_dict': {
+ 'id': '2015-0703-001',
+ 'ext': 'mp4',
+ 'title': 'Daily Show for July 03, 2015',
+ 'description': 'md5:80eb927244d6749900de6072c7cc2c86',
+ },
+ }, {
+ 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
+ 'info_dict': {
+ 'id': '2015-0703-001',
+ 'ext': 'mp4',
+ 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
+ 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._parse_json(self._search_regex(
+ r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
+ display_id)
+
+ title = json_data['title']
+ formats = []
+
+ video_id = None
+
+ for key in ('file', 'audio', 'video', 'high_res_video'):
+ media_url = json_data.get(key, '')
+ if not media_url:
+ continue
+ media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
+ video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
+ formats.append({
+ 'url': media_url,
+ 'vcodec': 'none' if key == 'audio' else None,
+ })
+
+ self._sort_formats(formats)
+
+ default_lang = 'en'
+ subtitles = {}
+
+ def add_subtitle_item(lang, info_dict):
+ if lang not in subtitles:
+ subtitles[lang] = []
+ subtitles[lang].append(info_dict)
+
+ # chapter_file are not subtitles
+ if 'caption_file' in json_data:
+ add_subtitle_item(default_lang, {
+ 'url': compat_urlparse.urljoin(url, json_data['caption_file']),
+ })
+
+ for subtitle_item in json_data.get('captions', []):
+ lang = subtitle_item.get('language', '').lower() or default_lang
+ add_subtitle_item(lang, {
+ 'url': compat_urlparse.urljoin(url, subtitle_item['url']),
+ })
+
+ description = self._og_search_description(webpage, default=None)
+
+ return {
+ 'id': video_id or display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': json_data.get('image'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/dfb.py b/hypervideo_dl/extractor/dfb.py
new file mode 100644
index 0000000..a4d0448
--- /dev/null
+++ b/hypervideo_dl/extractor/dfb.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class DFBIE(InfoExtractor):
+ IE_NAME = 'tv.dfb.de'
+ _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
+ 'md5': 'ac0f98a52a330f700b4b3034ad240649',
+ 'info_dict': {
+ 'id': '11633',
+ 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
+ 'ext': 'mp4',
+ 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
+ 'upload_date': '20150714',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ player_info = self._download_xml(
+ 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
+ display_id)
+ video_info = player_info.find('video')
+ stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
+
+ formats = []
+ # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
+ for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
+ stream_access_info = self._download_xml(sa_url, display_id)
+ token_el = stream_access_info.find('token')
+ manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
+ if '.f4m' in manifest_url:
+ formats.extend(self._extract_f4m_formats(
+ manifest_url + '&hdcore=3.2.0',
+ display_id, f4m_id='hds', fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, display_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': video_info.find('title').text,
+ 'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
+ 'upload_date': unified_strdate(video_info.find('time_date').text),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/dhm.py b/hypervideo_dl/extractor/dhm.py
new file mode 100644
index 0000000..aee72a6
--- /dev/null
+++ b/hypervideo_dl/extractor/dhm.py
@@ -0,0 +1,59 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class DHMIE(InfoExtractor):
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ entries = self._extract_xspf_playlist(playlist_url, playlist_id)
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ entries[0].update({
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ })
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/digg.py b/hypervideo_dl/extractor/digg.py
new file mode 100644
index 0000000..913c175
--- /dev/null
+++ b/hypervideo_dl/extractor/digg.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class DiggIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # JWPlatform via provider
+ 'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
+ 'info_dict': {
+ 'id': 'LcqvmS0b',
+ 'ext': 'mp4',
+ 'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
+ 'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
+ 'upload_date': '20180109',
+ 'timestamp': 1515530551,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Youtube via provider
+ 'url': 'http://digg.com/video/dog-boat-seal-play',
+ 'only_matching': True,
+ }, {
+ # vimeo as regular embed
+ 'url': 'http://digg.com/video/dream-girl-short-film',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ info = self._parse_json(
+ self._search_regex(
+ r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
+ default='{}'), display_id, transform_source=js_to_json,
+ fatal=False)
+
+ video_id = info.get('video_id')
+
+ if video_id:
+ provider = info.get('provider_name')
+ if provider == 'youtube':
+ return self.url_result(
+ video_id, ie='Youtube', video_id=video_id)
+ elif provider == 'jwplayer':
+ return self.url_result(
+ 'jwplatform:%s' % video_id, ie='JWPlatform',
+ video_id=video_id)
+
+ return self.url_result(url, 'Generic')
diff --git a/hypervideo_dl/extractor/digiteka.py b/hypervideo_dl/extractor/digiteka.py
new file mode 100644
index 0000000..3dfde0d
--- /dev/null
+++ b/hypervideo_dl/extractor/digiteka.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DigitekaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
+ (?:
+ deliver/
+ (?P<embed_type>
+ generic|
+ musique
+ )
+ (?:/[^/]+)*/
+ (?:
+ src|
+ article
+ )|
+ default/index/video
+ (?P<site_type>
+ generic|
+ music
+ )
+ /id
+ )/(?P<id>[\d+a-z]+)'''
+ _TESTS = [{
+ # news
+ 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+ 'md5': '276a0e49de58c7e85d32b057837952a2',
+ 'info_dict': {
+ 'id': 's8uk0r',
+ 'ext': 'mp4',
+ 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 74,
+ 'upload_date': '20150317',
+ 'timestamp': 1426604939,
+ 'uploader_id': '3fszv',
+ },
+ }, {
+ # music
+ 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+ 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+ 'info_dict': {
+ 'id': 'xvpfp8',
+ 'ext': 'mp4',
+ 'title': 'Two - C\'est La Vie (clip)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 233,
+ 'upload_date': '20150224',
+ 'timestamp': 1424760500,
+ 'uploader_id': '3rfzk',
+ },
+ }, {
+ 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ video_type = mobj.group('embed_type') or mobj.group('site_type')
+ if video_type == 'music':
+ video_type = 'musique'
+
+ deliver_info = self._download_json(
+ 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
+ video_id)
+
+ yt_id = deliver_info.get('yt_id')
+ if yt_id:
+ return self.url_result(yt_id, 'Youtube')
+
+ jwconf = deliver_info['jwconf']
+
+ formats = []
+ for source in jwconf['playlist'][0]['sources']:
+ formats.append({
+ 'url': source['file'],
+ 'format_id': source.get('label'),
+ })
+
+ self._sort_formats(formats)
+
+ title = deliver_info['title']
+ thumbnail = jwconf.get('image')
+ duration = int_or_none(deliver_info.get('duration'))
+ timestamp = int_or_none(deliver_info.get('release_time'))
+ uploader_id = deliver_info.get('owner_id')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/discovery.py b/hypervideo_dl/extractor/discovery.py
new file mode 100644
index 0000000..e0139cc
--- /dev/null
+++ b/hypervideo_dl/extractor/discovery.py
@@ -0,0 +1,118 @@
+from __future__ import unicode_literals
+
+import random
+import re
+import string
+
+from .discoverygo import DiscoveryGoBaseIE
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
+from ..compat import compat_HTTPError
+
+
+class DiscoveryIE(DiscoveryGoBaseIE):
+ _VALID_URL = r'''(?x)https?://
+ (?P<site>
+ go\.discovery|
+ www\.
+ (?:
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc
+ )|
+ watch\.
+ (?:
+ hgtv|
+ foodnetwork|
+ travelchannel|
+ diynetwork|
+ cookingchanneltv|
+ motortrend
+ )
+ )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
+ _TESTS = [{
+ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
+ 'info_dict': {
+ 'id': '5a2f35ce6b66d17a5026e29e',
+ 'ext': 'mp4',
+ 'title': 'Riding with Matthew Perry',
+ 'description': 'md5:a34333153e79bc4526019a5129e7f878',
+ 'duration': 84,
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ }
+ }, {
+ 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
+ 'only_matching': True,
+ }, {
+ # using `show_slug` is important to get the correct video data
+ 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['US']
+ _GEO_BYPASS = False
+ _API_BASE_URL = 'https://api.discovery.com/v1/'
+
+ def _real_extract(self, url):
+ site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
+
+ access_token = None
+ cookies = self._get_cookies(url)
+
+ # prefer Affiliate Auth Token over Anonymous Auth Token
+ auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
+ if auth_storage_cookie and auth_storage_cookie.value:
+ auth_storage = self._parse_json(compat_urllib_parse_unquote(
+ compat_urllib_parse_unquote(auth_storage_cookie.value)),
+ display_id, fatal=False) or {}
+ access_token = auth_storage.get('a') or auth_storage.get('access_token')
+
+ if not access_token:
+ access_token = self._download_json(
+ 'https://%s.com/anonymous' % site, display_id,
+ 'Downloading token JSON metadata', query={
+ 'authRel': 'authorization',
+ 'client_id': '3020a40c2356a645b4b4',
+ 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+ 'redirectUri': 'https://www.discovery.com/',
+ })['access_token']
+
+ headers = self.geo_verification_headers()
+ headers['Authorization'] = 'Bearer ' + access_token
+
+ try:
+ video = self._download_json(
+ self._API_BASE_URL + 'content/videos',
+ display_id, 'Downloading content JSON metadata',
+ headers=headers, query={
+ 'embed': 'show.name',
+ 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
+ 'slug': display_id,
+ 'show_slug': show_slug,
+ })[0]
+ video_id = video['id']
+ stream = self._download_json(
+ self._API_BASE_URL + 'streaming/video/' + video_id,
+ display_id, 'Downloading streaming JSON metadata', headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+ e_description = self._parse_json(
+ e.cause.read().decode(), display_id)['description']
+ if 'resource not available for country' in e_description:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ if 'Authorized Networks' in e_description:
+ raise ExtractorError(
+ 'This video is only available via cable service provider subscription that'
+ ' is not currently supported. You may want to use --cookies.', expected=True)
+ raise ExtractorError(e_description)
+ raise
+
+ return self._extract_video_info(video, stream, display_id)
diff --git a/hypervideo_dl/extractor/discoverygo.py b/hypervideo_dl/extractor/discoverygo.py
new file mode 100644
index 0000000..9e7b14a
--- /dev/null
+++ b/hypervideo_dl/extractor/discoverygo.py
@@ -0,0 +1,175 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ remove_end,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class DiscoveryGoBaseIE(InfoExtractor):
+ _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
+ discovery|
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc|
+ velocitychannel
+ )go\.com/%s(?P<id>[^/?#&]+)'''
+
+ def _extract_video_info(self, video, stream, display_id):
+ title = video['name']
+
+ if not stream:
+ if video.get('authenticated') is True:
+ raise ExtractorError(
+ 'This video is only available via cable service provider subscription that'
+ ' is not currently supported. You may want to use --cookies.', expected=True)
+ else:
+ raise ExtractorError('Unable to find stream')
+ STREAM_URL_SUFFIX = 'streamUrl'
+ formats = []
+ for stream_kind in ('', 'hds'):
+ suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
+ stream_url = stream.get('%s%s' % (stream_kind, suffix))
+ if not stream_url:
+ continue
+ if stream_kind == '':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif stream_kind == 'hds':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, display_id, f4m_id=stream_kind, fatal=False))
+ self._sort_formats(formats)
+
+ video_id = video.get('id') or display_id
+ description = video.get('description', {}).get('detailed')
+ duration = int_or_none(video.get('duration'))
+
+ series = video.get('show', {}).get('name')
+ season_number = int_or_none(video.get('season', {}).get('number'))
+ episode_number = int_or_none(video.get('episodeNumber'))
+
+ tags = video.get('tags')
+ age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
+
+ subtitles = {}
+ captions = stream.get('captions')
+ if isinstance(captions, list):
+ for caption in captions:
+ subtitle_url = url_or_none(caption.get('fileUrl'))
+ if not subtitle_url or not subtitle_url.startswith('http'):
+ continue
+ lang = caption.get('fileLang', 'en')
+ ext = determine_ext(subtitle_url)
+ subtitles.setdefault(lang, []).append({
+ 'url': subtitle_url,
+ 'ext': 'ttml' if ext == 'xml' else ext,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'tags': tags,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class DiscoveryGoIE(DiscoveryGoBaseIE):
+ _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
+ _GEO_COUNTRIES = ['US']
+ _TEST = {
+ 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
+ 'info_dict': {
+ 'id': '58c167d86b66d12f2addeb01',
+ 'ext': 'mp4',
+ 'title': 'Reaper Madness',
+ 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
+ 'duration': 2519,
+ 'series': 'Bering Sea Gold',
+ 'season_number': 8,
+ 'episode_number': 6,
+ 'age_limit': 14,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ container = extract_attributes(
+ self._search_regex(
+ r'(<div[^>]+class=["\']video-player-container[^>]+>)',
+ webpage, 'video container'))
+
+ video = self._parse_json(
+ container.get('data-video') or container.get('data-json'),
+ display_id)
+
+ stream = video.get('stream')
+
+ return self._extract_video_info(video, stream, display_id)
+
+
+class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
+ _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
+ _TEST = {
+ 'url': 'https://www.discoverygo.com/bering-sea-gold/',
+ 'info_dict': {
+ 'id': 'bering-sea-gold',
+ 'title': 'Bering Sea Gold',
+ 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
+ },
+ 'playlist_mincount': 6,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DiscoveryGoIE.suitable(url) else super(
+ DiscoveryGoPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
+ data = self._parse_json(
+ mobj.group('json'), display_id,
+ transform_source=unescapeHTML, fatal=False)
+ if not isinstance(data, dict) or data.get('type') != 'episode':
+ continue
+ episode_url = data.get('socialUrl')
+ if not episode_url:
+ continue
+ entries.append(self.url_result(
+ episode_url, ie=DiscoveryGoIE.ie_key(),
+ video_id=data.get('id')))
+
+ return self.playlist_result(
+ entries, display_id,
+ remove_end(self._og_search_title(
+ webpage, fatal=False), ' | Discovery GO'),
+ self._og_search_description(webpage))
diff --git a/hypervideo_dl/extractor/discoverynetworks.py b/hypervideo_dl/extractor/discoverynetworks.py
new file mode 100644
index 0000000..c512b95
--- /dev/null
+++ b/hypervideo_dl/extractor/discoverynetworks.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .dplay import DPlayIE
+
+
+class DiscoveryNetworksDeIE(DPlayIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
+ 'info_dict': {
+ 'id': '78867',
+ 'ext': 'mp4',
+ 'title': 'Die Welt da draußen',
+ 'description': 'md5:61033c12b73286e409d99a41742ef608',
+ 'timestamp': 1554069600,
+ 'upload_date': '20190331',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
+ country = 'GB' if domain == 'dplay.co.uk' else 'DE'
+ realm = 'questuk' if country == 'GB' else domain.replace('.', '')
+ return self._get_disco_api_info(
+ url, '%s/%s' % (programme, alternate_id),
+ 'sonic-eu1-prod.disco-api.com', realm, country)
diff --git a/hypervideo_dl/extractor/discoveryvr.py b/hypervideo_dl/extractor/discoveryvr.py
new file mode 100644
index 0000000..cb63c26
--- /dev/null
+++ b/hypervideo_dl/extractor/discoveryvr.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class DiscoveryVRIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
+ 'md5': '32b1929798c464a54356378b7912eca4',
+ 'info_dict': {
+ 'id': 'discovery-vr-an-introduction',
+ 'ext': 'mp4',
+ 'title': 'Discovery VR - An Introduction',
+ 'description': 'md5:80d418a10efb8899d9403e61d8790f06',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ bootstrap_data = self._search_regex(
+ r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
+ webpage, 'bootstrap data')
+ bootstrap_data = self._parse_json(
+ bootstrap_data.encode('utf-8').decode('unicode_escape'),
+ display_id)
+ videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
+ video_data = next(video for video in videos if video.get('slug') == display_id)
+
+ series = video_data.get('showTitle')
+ title = episode = video_data.get('title') or series
+ if series and series != title:
+ title = '%s - %s' % (series, title)
+
+ formats = []
+ for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
+ f_url = video_data.get(f)
+ if not f_url:
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': f_url,
+ })
+
+ return {
+ 'id': display_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'duration': parse_duration(video_data.get('runTime')),
+ 'formats': formats,
+ 'episode': episode,
+ 'series': series,
+ }
diff --git a/hypervideo_dl/extractor/disney.py b/hypervideo_dl/extractor/disney.py
new file mode 100644
index 0000000..0eee82f
--- /dev/null
+++ b/hypervideo_dl/extractor/disney.py
@@ -0,0 +1,170 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ compat_str,
+ determine_ext,
+ ExtractorError,
+ update_url_query,
+)
+
+
+class DisneyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
+ _TESTS = [{
+ # Disney.EmbedVideo
+ 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
+ 'info_dict': {
+ 'id': '545ed1857afee5a0ec239977',
+ 'ext': 'mp4',
+ 'title': 'Moana - Trailer',
+ 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7',
+ 'upload_date': '20170112',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Grill.burger
+ 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
+ 'info_dict': {
+ 'id': '5454e9f4e9804a552e3524c8',
+ 'ext': 'mp4',
+ 'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
+ 'upload_date': '20170104',
+ 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
+ r'Grill\.burger\s*=\s*({.+})\s*:',
+ webpage, 'grill data'))
+ page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
+ video_data = page_data['data'][0]
+ else:
+ webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (domain, video_id), video_id)
+ page_data = self._parse_json(self._search_regex(
+ r'Disney\.EmbedVideo\s*=\s*({.+});',
+ webpage, 'embed data'), video_id)
+ video_data = page_data['video']
+
+ for external in video_data.get('externals', []):
+ if external.get('source') == 'vevo':
+ return self.url_result('vevo:' + external['data_id'], 'Vevo')
+
+ video_id = video_data['id']
+ title = video_data['title']
+
+ formats = []
+ for flavor in video_data.get('flavors', []):
+ flavor_format = flavor.get('format')
+ flavor_url = flavor.get('url')
+ if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
+ continue
+ tbr = int_or_none(flavor.get('bitrate'))
+ if tbr == 99999:
+ # wrong ks(Kaltura Signature) causes 404 Error
+ flavor_url = update_url_query(flavor_url, {'ks': ''})
+ m3u8_formats = self._extract_m3u8_formats(
+ flavor_url, video_id, 'mp4',
+ m3u8_id=flavor_format, fatal=False)
+ for f in m3u8_formats:
+ # Apple FairPlay
+ if '/fpshls/' in f['url']:
+ continue
+ formats.append(f)
+ continue
+ format_id = []
+ if flavor_format:
+ format_id.append(flavor_format)
+ if tbr:
+ format_id.append(compat_str(tbr))
+ ext = determine_ext(flavor_url)
+ if flavor_format == 'applehttp' or ext == 'm3u8':
+ ext = 'mp4'
+ width = int_or_none(flavor.get('width'))
+ height = int_or_none(flavor.get('height'))
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': flavor_url,
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'vcodec': 'none' if (width == 0 and height == 0) else None,
+ })
+ if not formats and video_data.get('expired'):
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
+ expected=True)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ caption_format = caption.get('format')
+ if not caption_url or caption_format.startswith('unknown'):
+ continue
+ subtitles.setdefault(caption.get('language', 'en'), []).append({
+ 'url': caption_url,
+ 'ext': {
+ 'webvtt': 'vtt',
+ }.get(caption_format, caption_format),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description') or video_data.get('short_desc'),
+ 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'),
+ 'duration': int_or_none(video_data.get('duration_sec')),
+ 'upload_date': unified_strdate(video_data.get('publish_date')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/dispeak.py b/hypervideo_dl/extractor/dispeak.py
new file mode 100644
index 0000000..276fd4b
--- /dev/null
+++ b/hypervideo_dl/extractor/dispeak.py
@@ -0,0 +1,131 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ remove_end,
+ xpath_element,
+ xpath_text,
+)
+
+
+class DigitallySpeakingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
+
+ _TESTS = [{
+ # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
+ 'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
+ 'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+ 'info_dict': {
+ 'id': '840376_BQRC',
+ 'ext': 'mp4',
+ 'title': 'Tenacious Design and The Interface of \'Destiny\'',
+ },
+ }, {
+ # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
+ 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
+ 'only_matching': True,
+ }, {
+ # From http://www.gdcvault.com/play/1013700/Advanced-Material
+ 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
+ 'only_matching': True,
+ }, {
+ # From https://gdcvault.com/play/1016624, empty speakerVideo
+ 'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
+ 'info_dict': {
+ 'id': '201210-822101_1349794556671DDDD',
+ 'ext': 'flv',
+ 'title': 'Pre-launch - Preparing to Take the Plunge',
+ },
+ }, {
+ # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
+ 'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
+ 'only_matching': True,
+ }]
+
+ def _parse_mp4(self, metadata):
+ video_formats = []
+ video_root = None
+
+ mp4_video = xpath_text(metadata, './mp4video', default=None)
+ if mp4_video is not None:
+ mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
+ video_root = mobj.group('root')
+ if video_root is None:
+ http_host = xpath_text(metadata, 'httpHost', default=None)
+ if http_host:
+ video_root = 'http://%s/' % http_host
+ if video_root is None:
+ # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
+ # Works for GPUTechConf, too
+ video_root = 'http://s3-2u.digitallyspeaking.com/'
+
+ formats = metadata.findall('./MBRVideos/MBRVideo')
+ if not formats:
+ return None
+ for a_format in formats:
+ stream_name = xpath_text(a_format, 'streamName', fatal=True)
+ video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
+ url = video_root + video_path
+ bitrate = xpath_text(a_format, 'bitrate')
+ tbr = int_or_none(bitrate)
+ vbr = int_or_none(self._search_regex(
+ r'-(\d+)\.mp4', video_path, 'vbr', default=None))
+ abr = tbr - vbr if tbr and vbr else None
+ video_formats.append({
+ 'format_id': bitrate,
+ 'url': url,
+ 'tbr': tbr,
+ 'vbr': vbr,
+ 'abr': abr,
+ })
+ return video_formats
+
+ def _parse_flv(self, metadata):
+ formats = []
+ akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
+ audios = metadata.findall('./audios/audio')
+ for audio in audios:
+ formats.append({
+ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+ 'play_path': remove_end(audio.get('url'), '.flv'),
+ 'ext': 'flv',
+ 'vcodec': 'none',
+ 'format_id': audio.get('code'),
+ })
+ for video_key, format_id, preference in (
+ ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
+ video_path = xpath_text(metadata, './%sVideo' % video_key)
+ if not video_path:
+ continue
+ formats.append({
+ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+ 'play_path': remove_end(video_path, '.flv'),
+ 'ext': 'flv',
+ 'format_note': '%s video' % video_key,
+ 'quality': preference,
+ 'preference': preference,
+ 'format_id': format_id,
+ })
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ xml_description = self._download_xml(url, video_id)
+ metadata = xpath_element(xml_description, 'metadata')
+
+ video_formats = self._parse_mp4(metadata)
+ if video_formats is None:
+ video_formats = self._parse_flv(metadata)
+
+ return {
+ 'id': video_id,
+ 'formats': video_formats,
+ 'title': xpath_text(metadata, 'title', fatal=True),
+ 'duration': parse_duration(xpath_text(metadata, 'endTime')),
+ 'creator': xpath_text(metadata, 'speaker'),
+ }
diff --git a/hypervideo_dl/extractor/dlive.py b/hypervideo_dl/extractor/dlive.py
new file mode 100644
index 0000000..d95c67a
--- /dev/null
+++ b/hypervideo_dl/extractor/dlive.py
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DLiveVODIE(InfoExtractor):
+ IE_NAME = 'dlive:vod'
+ _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
+ 'info_dict': {
+ 'id': '3mTzOl4WR',
+ 'ext': 'mp4',
+ 'title': 'Minecraft with james charles epic',
+ 'upload_date': '20190701',
+ 'timestamp': 1562011015,
+ 'uploader_id': 'pdp',
+ }
+ }, {
+ 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
+ broadcast = self._download_json(
+ 'https://graphigo.prd.dlive.tv/', vod_id,
+ data=json.dumps({'query': '''query {
+ pastBroadcast(permlink:"%s+%s") {
+ content
+ createdAt
+ length
+ playbackUrl
+ title
+ thumbnailUrl
+ viewCount
+ }
+}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
+ title = broadcast['title']
+ formats = self._extract_m3u8_formats(
+ broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+ return {
+ 'id': vod_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'description': broadcast.get('content'),
+ 'thumbnail': broadcast.get('thumbnailUrl'),
+ 'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
+ 'view_count': int_or_none(broadcast.get('viewCount')),
+ }
+
+
+class DLiveStreamIE(InfoExtractor):
+ IE_NAME = 'dlive:stream'
+ _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
+
+ def _real_extract(self, url):
+ display_name = self._match_id(url)
+ user = self._download_json(
+ 'https://graphigo.prd.dlive.tv/', display_name,
+ data=json.dumps({'query': '''query {
+ userByDisplayName(displayname:"%s") {
+ livestream {
+ content
+ createdAt
+ title
+ thumbnailUrl
+ watchingCount
+ }
+ username
+ }
+}''' % display_name}).encode())['data']['userByDisplayName']
+ livestream = user['livestream']
+ title = livestream['title']
+ username = user['username']
+ formats = self._extract_m3u8_formats(
+ 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
+ display_name, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': display_name,
+ 'title': self._live_title(title),
+ 'uploader': display_name,
+ 'uploader_id': username,
+ 'formats': formats,
+ 'description': livestream.get('content'),
+ 'thumbnail': livestream.get('thumbnailUrl'),
+ 'is_live': True,
+ 'timestamp': int_or_none(livestream.get('createdAt'), 1000),
+ 'view_count': int_or_none(livestream.get('watchingCount')),
+ }
diff --git a/hypervideo_dl/extractor/dotsub.py b/hypervideo_dl/extractor/dotsub.py
new file mode 100644
index 0000000..148605c
--- /dev/null
+++ b/hypervideo_dl/extractor/dotsub.py
@@ -0,0 +1,83 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class DotsubIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
+ 'md5': '21c7ff600f545358134fea762a6d42b6',
+ 'info_dict': {
+ 'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09',
+ 'ext': 'flv',
+ 'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever',
+ 'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6',
+ 'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p',
+ 'duration': 198,
+ 'uploader': 'liuxt',
+ 'timestamp': 1385778501.104,
+ 'upload_date': '20131130',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
+ 'md5': '2bb4a83896434d5c26be868c609429a3',
+ 'info_dict': {
+ 'id': '168006778',
+ 'ext': 'mp4',
+ 'title': 'Apartments and flats in Raipur the white symphony',
+ 'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
+ 'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
+ 'duration': 290,
+ 'timestamp': 1476767794.2809999,
+ 'upload_date': '20161018',
+ 'uploader': 'parthivi001',
+ 'uploader_id': 'user52596202',
+ 'view_count': int,
+ },
+ 'add_ie': ['Vimeo'],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
+ video_url = info.get('mediaURI')
+
+ if not video_url:
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._search_regex(
+ [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
+ webpage, 'video url', default=None)
+ info_dict = {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ }
+
+ if not video_url:
+ setup_data = self._parse_json(self._html_search_regex(
+ r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
+ webpage, 'setup data', group='content'), video_id)
+ info_dict = {
+ '_type': 'url_transparent',
+ 'url': setup_data['src'],
+ }
+
+ info_dict.update({
+ 'title': info['title'],
+ 'description': info.get('description'),
+ 'thumbnail': info.get('screenshotURI'),
+ 'duration': int_or_none(info.get('duration'), 1000),
+ 'uploader': info.get('user'),
+ 'timestamp': float_or_none(info.get('dateCreated'), 1000),
+ 'view_count': int_or_none(info.get('numberOfViews')),
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/douyutv.py b/hypervideo_dl/extractor/douyutv.py
new file mode 100644
index 0000000..9757f44
--- /dev/null
+++ b/hypervideo_dl/extractor/douyutv.py
@@ -0,0 +1,201 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import time
+import hashlib
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unescapeHTML,
+ unified_strdate,
+ urljoin,
+)
+
+
+class DouyuTVIE(InfoExtractor):
+ IE_DESC = '斗鱼'
+ _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.douyutv.com/iseven',
+ 'info_dict': {
+ 'id': '17732',
+ 'display_id': 'iseven',
+ 'ext': 'flv',
+ 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': r're:.*m7show@163\.com.*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': '7师傅',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'douyu小漠',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Room not found',
+ }, {
+ 'url': 'http://www.douyutv.com/17732',
+ 'info_dict': {
+ 'id': '17732',
+ 'display_id': '17732',
+ 'ext': 'flv',
+ 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': r're:.*m7show@163\.com.*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': '7师傅',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.douyu.com/xiaocang',
+ 'only_matching': True,
+ }, {
+ # \"room_id\"
+ 'url': 'http://www.douyu.com/t/lpl',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if video_id.isdigit():
+ room_id = video_id
+ else:
+ page = self._download_webpage(url, video_id)
+ room_id = self._html_search_regex(
+ r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
+
+ # Grab metadata from mobile API
+ room = self._download_json(
+ 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
+ note='Downloading room info')['data']
+
+ # 1 = live, 2 = offline
+ if room.get('show_status') == '2':
+ raise ExtractorError('Live stream is offline', expected=True)
+
+ # Grab the URL from PC client API
+ # The m3u8 url from mobile API requires re-authentication every 5 minutes
+ tt = int(time.time())
+ signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt)
+ sign = hashlib.md5(signContent.encode('ascii')).hexdigest()
+ video_url = self._download_json(
+ 'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id,
+ video_id, note='Downloading video URL info',
+ query={'rate': 0}, headers={
+ 'auth': sign,
+ 'time': str(tt),
+ 'aid': 'pcclient'
+ })['data']['live_url']
+
+ title = self._live_title(unescapeHTML(room['room_name']))
+ description = room.get('show_details')
+ thumbnail = room.get('room_src')
+ uploader = room.get('nickname')
+
+ return {
+ 'id': room_id,
+ 'display_id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'is_live': True,
+ }
+
+
+class DouyuShowIE(InfoExtractor):
+ _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
+ 'md5': '0c2cfd068ee2afe657801269b2d86214',
+ 'info_dict': {
+ 'id': 'rjNBdvnVXNzvE2yw',
+ 'ext': 'mp4',
+ 'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
+ 'duration': 7150.08,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': '陈一发儿',
+ 'uploader_id': 'XrZwYelr5wbK',
+ 'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
+ 'upload_date': '20170402',
+ },
+ }, {
+ 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url = url.replace('vmobile.', 'v.')
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ room_info = self._parse_json(self._search_regex(
+ r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
+
+ video_info = None
+
+ for trial in range(5):
+ # Sometimes Douyu rejects our request. Let's try it more times
+ try:
+ video_info = self._download_json(
+ 'https://vmobile.douyu.com/video/getInfo', video_id,
+ query={'vid': video_id},
+ headers={
+ 'Referer': url,
+ 'x-requested-with': 'XMLHttpRequest',
+ })
+ break
+ except ExtractorError:
+ self._sleep(1, video_id)
+
+ if not video_info:
+ raise ExtractorError('Can\'t fetch video info')
+
+ formats = self._extract_m3u8_formats(
+ video_info['data']['video_url'], video_id,
+ entry_protocol='m3u8_native', ext='mp4')
+
+ upload_date = unified_strdate(self._html_search_regex(
+ r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
+ 'upload date', fatal=False))
+
+ uploader = uploader_id = uploader_url = None
+ mobj = re.search(
+ r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
+ webpage)
+ if mobj:
+ uploader_id, uploader = mobj.groups()
+ uploader_url = urljoin(url, '/author/' + uploader_id)
+
+ return {
+ 'id': video_id,
+ 'title': room_info['name'],
+ 'formats': formats,
+ 'duration': room_info.get('duration'),
+ 'thumbnail': room_info.get('pic'),
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ }
diff --git a/hypervideo_dl/extractor/dplay.py b/hypervideo_dl/extractor/dplay.py
new file mode 100644
index 0000000..bbb1990
--- /dev/null
+++ b/hypervideo_dl/extractor/dplay.py
@@ -0,0 +1,369 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ unified_timestamp,
+)
+
+
+class DPlayIE(InfoExtractor):
+ _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
+ _VALID_URL = r'''(?x)https?://
+ (?P<domain>
+ (?:www\.)?(?P<host>d
+ (?:
+ play\.(?P<country>dk|fi|jp|se|no)|
+ iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
+ )
+ )|
+ (?P<subdomain_country>es|it)\.dplay\.com
+ )/[^/]+''' + _PATH_REGEX
+
+ _TESTS = [{
+ # non geo restricted, via secure api, unsigned download hls URL
+ 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'info_dict': {
+ 'id': '13628',
+ 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'ext': 'mp4',
+ 'title': 'Svensken lär sig njuta av livet',
+ 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
+ 'duration': 2649.856,
+ 'timestamp': 1365453720,
+ 'upload_date': '20130408',
+ 'creator': 'Kanal 5',
+ 'series': 'Nugammalt - 77 händelser som format Sverige',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted, via secure api, unsigned download hls URL
+ 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'info_dict': {
+ 'id': '104465',
+ 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'ext': 'mp4',
+ 'title': 'Ted Bundy: Mind Of A Monster',
+ 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
+ 'duration': 5290.027,
+ 'timestamp': 1570694400,
+ 'upload_date': '20191010',
+ 'creator': 'ID - Investigation Discovery',
+ 'series': 'Ted Bundy: Mind Of A Monster',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # disco-api
+ 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
+ 'info_dict': {
+ 'id': '40206',
+ 'display_id': 'i-kongens-klr/sesong-1-episode-7',
+ 'ext': 'mp4',
+ 'title': 'Episode 7',
+ 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
+ 'duration': 2611.16,
+ 'timestamp': 1516726800,
+ 'upload_date': '20180123',
+ 'series': 'I kongens klær',
+ 'season_number': 1,
+ 'episode_number': 7,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
+ 'md5': '2b808ffb00fc47b884a172ca5d13053c',
+ 'info_dict': {
+ 'id': '6918',
+ 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+ 'ext': 'mp4',
+ 'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
+ 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'upload_date': '20160524',
+ 'timestamp': 1464076800,
+ 'series': 'Biografie imbarazzanti',
+ 'season_number': 1,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
+ 'info_dict': {
+ 'id': '21652',
+ 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
+ 'ext': 'mp4',
+ 'title': 'Episodio 1',
+ 'description': 'md5:b9dcff2071086e003737485210675f69',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'upload_date': '20180709',
+ 'timestamp': 1531173540,
+ 'series': 'La fiebre del oro',
+ 'season_number': 8,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.jp/video/gold-rush/24086',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
+ 'only_matching': True,
+ }]
+
+ def _process_errors(self, e, geo_countries):
+ info = self._parse_json(e.cause.read().decode('utf-8'), None)
+ error = info['errors'][0]
+ error_code = error.get('code')
+ if error_code == 'access.denied.geoblocked':
+ self.raise_geo_restricted(countries=geo_countries)
+ elif error_code in ('access.denied.missingpackage', 'invalid.token'):
+ raise ExtractorError(
+ 'This video is only available for registered users. You may want to use --cookies.', expected=True)
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers['Authorization'] = 'Bearer ' + self._download_json(
+ disco_base + 'token', display_id, 'Downloading token',
+ query={
+ 'realm': realm,
+ })['data']['attributes']['token']
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ streaming = self._download_json(
+ disco_base + 'playback/videoPlaybackInfo/' + video_id,
+ video_id, headers=headers)['data']['attributes']['streaming']
+ streaming_list = []
+ for format_id, format_dict in streaming.items():
+ streaming_list.append({
+ 'type': format_id,
+ 'url': format_dict.get('url'),
+ })
+ return streaming_list
+
+ def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
+ geo_countries = [country.upper()]
+ self._initialize_geo_bypass({
+ 'countries': geo_countries,
+ })
+ disco_base = 'https://%s/' % disco_host
+ headers = {
+ 'Referer': url,
+ }
+ self._update_disco_api_headers(headers, disco_base, display_id, realm)
+ try:
+ video = self._download_json(
+ disco_base + 'content/videos/' + display_id, display_id,
+ headers=headers, query={
+ 'fields[channel]': 'name',
+ 'fields[image]': 'height,src,width',
+ 'fields[show]': 'name',
+ 'fields[tag]': 'name',
+ 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+ 'include': 'images,primaryChannel,show,tags'
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ self._process_errors(e, geo_countries)
+ raise
+ video_id = video['data']['id']
+ info = video['data']['attributes']
+ title = info['name'].strip()
+ formats = []
+ try:
+ streaming = self._download_video_playback_info(
+ disco_base, video_id, headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self._process_errors(e, geo_countries)
+ raise
+ for format_dict in streaming:
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url:
+ continue
+ format_id = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if format_id == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, mpd_id='dash', fatal=False))
+ elif format_id == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ creator = series = None
+ tags = []
+ thumbnails = []
+ included = video.get('included') or []
+ if isinstance(included, list):
+ for e in included:
+ attributes = e.get('attributes')
+ if not attributes:
+ continue
+ e_type = e.get('type')
+ if e_type == 'channel':
+ creator = attributes.get('name')
+ elif e_type == 'image':
+ src = attributes.get('src')
+ if src:
+ thumbnails.append({
+ 'url': src,
+ 'width': int_or_none(attributes.get('width')),
+ 'height': int_or_none(attributes.get('height')),
+ })
+ if e_type == 'show':
+ series = attributes.get('name')
+ elif e_type == 'tag':
+ name = attributes.get('name')
+ if name:
+ tags.append(name)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': strip_or_none(info.get('description')),
+ 'duration': float_or_none(info.get('videoDuration'), 1000),
+ 'timestamp': unified_timestamp(info.get('publishStart')),
+ 'series': series,
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ 'creator': creator,
+ 'tags': tags,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ domain = mobj.group('domain').lstrip('www.')
+ country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
+ host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
+ return self._get_disco_api_info(
+ url, display_id, host, 'dplay' + country, country)
+
+
+class DiscoveryPlusIE(DPlayIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
+ 'info_dict': {
+ 'id': '1140794',
+ 'display_id': 'property-brothers-forever-home/food-and-family',
+ 'ext': 'mp4',
+ 'title': 'Food and Family',
+ 'description': 'The brothers help a Richmond family expand their single-level home.',
+ 'duration': 2583.113,
+ 'timestamp': 1609304400,
+ 'upload_date': '20201230',
+ 'creator': 'HGTV',
+ 'series': 'Property Brothers: Forever Home',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'Available for Premium users',
+ }]
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ return self._download_json(
+ disco_base + 'playback/v3/videoPlaybackInfo',
+ video_id, headers=headers, data=json.dumps({
+ 'deviceInfo': {
+ 'adBlocker': False,
+ },
+ 'videoId': video_id,
+ 'wisteriaProperties': {
+ 'platform': 'desktop',
+ 'product': 'dplus_us',
+ },
+ }).encode('utf-8'))['data']['attributes']['streaming']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._get_disco_api_info(
+ url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
+
+
+class HGTVDeIE(DPlayIE):
+ _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
+ 'info_dict': {
+ 'id': '151205',
+ 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
+ 'ext': 'mp4',
+ 'title': 'Wer braucht schon eine Toilette',
+ 'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
+ 'duration': 1177.024,
+ 'timestamp': 1595705400,
+ 'upload_date': '20200725',
+ 'creator': 'HGTV',
+ 'series': 'Tiny House - klein, aber oho',
+ 'season_number': 3,
+ 'episode_number': 3,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._get_disco_api_info(
+ url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
diff --git a/hypervideo_dl/extractor/drbonanza.py b/hypervideo_dl/extractor/drbonanza.py
new file mode 100644
index 0000000..164e97c
--- /dev/null
+++ b/hypervideo_dl/extractor/drbonanza.py
@@ -0,0 +1,59 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ parse_duration,
+ unescapeHTML,
+)
+
+
+class DRBonanzaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
+ 'info_dict': {
+ 'id': '40312',
+ 'display_id': 'matador---0824-komme-fremmede-',
+ 'ext': 'mp4',
+ 'title': 'MATADOR - 08:24. "Komme fremmede".',
+ 'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ 'duration': 4613,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ info = self._parse_html5_media_entries(
+ url, webpage, display_id, m3u8_id='hls',
+ m3u8_entry_protocol='m3u8_native')[0]
+ self._sort_formats(info['formats'])
+
+ asset = self._parse_json(
+ self._search_regex(
+ r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
+ display_id, transform_source=js_to_json)
+
+ title = unescapeHTML(asset['AssetTitle']).strip()
+
+ def extract(field):
+ return self._search_regex(
+ r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
+ webpage, field, default=None)
+
+ info.update({
+ 'id': asset.get('AssetId') or video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': extract('Programinfo'),
+ 'duration': parse_duration(extract('Tid')),
+ 'thumbnail': asset.get('AssetImageUrl'),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/dreisat.py b/hypervideo_dl/extractor/dreisat.py
new file mode 100644
index 0000000..5a07c18
--- /dev/null
+++ b/hypervideo_dl/extractor/dreisat.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from .zdf import ZDFIE
+
+
+class DreiSatIE(ZDFIE):
+ IE_NAME = '3sat'
+ _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+ _TESTS = [{
+ # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
+ 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
+ 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+ 'info_dict': {
+ 'id': '141007_ab18_10wochensommer_film',
+ 'ext': 'mp4',
+ 'title': 'Ab 18! - 10 Wochen Sommer',
+ 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+ 'duration': 2660,
+ 'timestamp': 1608604200,
+ 'upload_date': '20201222',
+ },
+ }, {
+ 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
+ 'info_dict': {
+ 'id': '140913_sendung_schweizweit',
+ 'ext': 'mp4',
+ 'title': 'Waidmannsheil',
+ 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+ 'timestamp': 1410623100,
+ 'upload_date': '20140913'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
+ 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+ 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/dropbox.py b/hypervideo_dl/extractor/dropbox.py
new file mode 100644
index 0000000..2bedcc1
--- /dev/null
+++ b/hypervideo_dl/extractor/dropbox.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os.path
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import url_basename
+
+
+class DropboxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
+ 'info_dict': {
+ 'id': 'nelirfsxnmcfbfh',
+ 'ext': 'mp4',
+ 'title': 'hypervideo test video \'ä"BaW_jenozKc'
+ }
+ }, {
+ 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ fn = compat_urllib_parse_unquote(url_basename(url))
+ title = os.path.splitext(fn)[0]
+ video_url = re.sub(r'[?&]dl=0', '', url)
+ video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ }
diff --git a/hypervideo_dl/extractor/drtuber.py b/hypervideo_dl/extractor/drtuber.py
new file mode 100644
index 0000000..2baea58
--- /dev/null
+++ b/hypervideo_dl/extractor/drtuber.py
@@ -0,0 +1,112 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ NO_DEFAULT,
+ parse_duration,
+ str_to_int,
+)
+
+
+class DrTuberIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+ _TESTS = [{
+ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
+ 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
+ 'info_dict': {
+ 'id': '1740434',
+ 'display_id': 'hot-perky-blonde-naked-golf',
+ 'ext': 'mp4',
+ 'title': 'hot perky blonde naked golf',
+ 'like_count': int,
+ 'comment_count': int,
+ 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://www.drtuber.com/embed/489939',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(
+ 'http://www.drtuber.com/video/%s' % video_id, display_id)
+
+ video_data = self._download_json(
+ 'http://www.drtuber.com/player_config_json/', video_id, query={
+ 'vid': video_id,
+ 'embed': 0,
+ 'aid': 0,
+ 'domain_id': 0,
+ })
+
+ formats = []
+ for format_id, video_url in video_data['files'].items():
+ if video_url:
+ formats.append({
+ 'format_id': format_id,
+ 'quality': 2 if format_id == 'hq' else 1,
+ 'url': video_url
+ })
+ self._sort_formats(formats)
+
+ duration = int_or_none(video_data.get('duration')) or parse_duration(
+ video_data.get('duration_format'))
+
+ title = self._html_search_regex(
+ (r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
+ r'<title>([^<]+)\s*@\s+DrTuber',
+ r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
+ r'<p[^>]+class="title_substrate">([^<]+)</p>',
+ r'<title>([^<]+) - \d+'),
+ webpage, 'title')
+
+ thumbnail = self._html_search_regex(
+ r'poster="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ def extract_count(id_, name, default=NO_DEFAULT):
+ return str_to_int(self._html_search_regex(
+ r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
+ webpage, '%s count' % name, default=default, fatal=False))
+
+ like_count = extract_count('rate_likes', 'like')
+ dislike_count = extract_count('rate_dislikes', 'dislike', default=None)
+ comment_count = extract_count('comments_count', 'comment')
+
+ cats_str = self._search_regex(
+ r'<div[^>]+class="categories_list">(.+?)</div>',
+ webpage, 'categories', fatal=False)
+ categories = [] if not cats_str else re.findall(
+ r'<a title="([^"]+)"', cats_str)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'age_limit': self._rta_search(webpage),
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/drtv.py b/hypervideo_dl/extractor/drtv.py
new file mode 100644
index 0000000..c0036ad
--- /dev/null
+++ b/hypervideo_dl/extractor/drtv.py
@@ -0,0 +1,355 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import binascii
+import hashlib
+import re
+
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ bytes_to_intlist,
+ ExtractorError,
+ int_or_none,
+ intlist_to_bytes,
+ float_or_none,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+)
+
+
+class DRTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
+ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
+ )
+ (?P<id>[\da-z_-]+)
+ '''
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['DK']
+ IE_NAME = 'drtv'
+ _TESTS = [{
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
+ 'md5': '25e659cccc9a2ed956110a299fdf5983',
+ 'info_dict': {
+ 'id': 'klassen-darlig-taber-10',
+ 'ext': 'mp4',
+ 'title': 'Klassen - Dårlig taber (10)',
+ 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
+ 'timestamp': 1539085800,
+ 'upload_date': '20181009',
+ 'duration': 606.84,
+ 'series': 'Klassen',
+ 'season': 'Klassen I',
+ 'season_number': 1,
+ 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
+ 'episode': 'Episode 10',
+ 'episode_number': 10,
+ 'release_year': 2016,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ # embed
+ 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
+ 'info_dict': {
+ 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
+ 'ext': 'mp4',
+ 'title': 'christiania pusher street ryddes drdkrjpo',
+ 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
+ 'timestamp': 1472800279,
+ 'upload_date': '20160902',
+ 'duration': 131.4,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ # with SignLanguage formats
+ 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
+ 'info_dict': {
+ 'id': 'historien-om-danmark-stenalder',
+ 'ext': 'mp4',
+ 'title': 'Historien om Danmark: Stenalder',
+ 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
+ 'timestamp': 1546628400,
+ 'upload_date': '20190104',
+ 'duration': 3502.56,
+ 'formats': 'mincount:20',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
+ 'info_dict': {
+ 'id': '00951930010',
+ 'ext': 'mp4',
+ 'title': 'Bonderøven (1:8)',
+ 'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
+ 'timestamp': 1546542000,
+ 'upload_date': '20190103',
+ 'duration': 2576.6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dr.dk/drtv/program/jagten_220924',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>Programmet er ikke længere tilgængeligt' in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
+ video_id = self._search_regex(
+ (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+ r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
+ webpage, 'video id', default=None)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
+ webpage, 'urn', default=None)
+ if video_id:
+ video_id = compat_urllib_parse_unquote(video_id)
+
+ _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
+ query = {'expanded': 'true'}
+
+ if video_id:
+ programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
+ else:
+ programcard_url = _PROGRAMCARD_BASE
+ page = self._parse_json(
+ self._search_regex(
+ r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
+ 'data'), '1')['cache']['page']
+ page = page[list(page.keys())[0]]
+ item = try_get(
+ page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
+ dict)
+ video_id = item['customId'].split(':')[-1]
+ query['productionnumber'] = video_id
+
+ data = self._download_json(
+ programcard_url, video_id, 'Downloading video JSON', query=query)
+
+ title = str_or_none(data.get('Title')) or re.sub(
+ r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
+ self._og_search_title(webpage))
+ description = self._og_search_description(
+ webpage, default=None) or data.get('Description')
+
+ timestamp = unified_timestamp(
+ data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
+
+ thumbnail = None
+ duration = None
+
+ restricted_to_denmark = False
+
+ formats = []
+ subtitles = {}
+
+ assets = []
+ primary_asset = data.get('PrimaryAsset')
+ if isinstance(primary_asset, dict):
+ assets.append(primary_asset)
+ secondary_assets = data.get('SecondaryAssets')
+ if isinstance(secondary_assets, list):
+ for secondary_asset in secondary_assets:
+ if isinstance(secondary_asset, dict):
+ assets.append(secondary_asset)
+
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
+
+ def decrypt_uri(e):
+ n = int(e[2:10], 16)
+ a = e[10 + n:]
+ data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
+ key = bytes_to_intlist(hashlib.sha256(
+ ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
+ iv = bytes_to_intlist(hex_to_bytes(a))
+ decrypted = aes_cbc_decrypt(data, key, iv)
+ return intlist_to_bytes(
+ decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
+
+ for asset in assets:
+ kind = asset.get('Kind')
+ if kind == 'Image':
+ thumbnail = url_or_none(asset.get('Uri'))
+ elif kind in ('VideoResource', 'AudioResource'):
+ duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
+ restricted_to_denmark = asset.get('RestrictedToDenmark')
+ asset_target = asset.get('Target')
+ for link in asset.get('Links', []):
+ uri = link.get('Uri')
+ if not uri:
+ encrypted_uri = link.get('EncryptedUri')
+ if not encrypted_uri:
+ continue
+ try:
+ uri = decrypt_uri(encrypted_uri)
+ except Exception:
+ self.report_warning(
+ 'Unable to decrypt EncryptedUri', video_id)
+ continue
+ uri = url_or_none(uri)
+ if not uri:
+ continue
+ target = link.get('Target')
+ format_id = target or ''
+ if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
+ preference = -1
+ format_id += '-%s' % asset_target
+ elif asset_target == 'Default':
+ preference = 1
+ else:
+ preference = None
+ if target == 'HDS':
+ f4m_formats = self._extract_f4m_formats(
+ uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
+ video_id, preference, f4m_id=format_id, fatal=False)
+ if kind == 'AudioResource':
+ for f in f4m_formats:
+ f['vcodec'] = 'none'
+ formats.extend(f4m_formats)
+ elif target == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ uri, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=preference, m3u8_id=format_id,
+ fatal=False))
+ else:
+ bitrate = link.get('Bitrate')
+ if bitrate:
+ format_id += '-%s' % bitrate
+ formats.append({
+ 'url': uri,
+ 'format_id': format_id,
+ 'tbr': int_or_none(bitrate),
+ 'ext': link.get('FileFormat'),
+ 'vcodec': 'none' if kind == 'AudioResource' else None,
+ 'preference': preference,
+ })
+ subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
+ if isinstance(subtitles_list, list):
+ LANGS = {
+ 'Danish': 'da',
+ }
+ for subs in subtitles_list:
+ if not isinstance(subs, dict):
+ continue
+ sub_uri = url_or_none(subs.get('Uri'))
+ if not sub_uri:
+ continue
+ lang = subs.get('Language') or 'da'
+ subtitles.setdefault(LANGS.get(lang, lang), []).append({
+ 'url': sub_uri,
+ 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
+ })
+
+ if not formats and restricted_to_denmark:
+ self.raise_geo_restricted(
+ 'Unfortunately, DR is not allowed to show this program outside Denmark.',
+ countries=self._GEO_COUNTRIES)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series': str_or_none(data.get('SeriesTitle')),
+ 'season': str_or_none(data.get('SeasonTitle')),
+ 'season_number': int_or_none(data.get('SeasonNumber')),
+ 'season_id': str_or_none(data.get('SeasonUrn')),
+ 'episode': str_or_none(data.get('EpisodeTitle')),
+ 'episode_number': int_or_none(data.get('EpisodeNumber')),
+ 'release_year': int_or_none(data.get('ProductionYear')),
+ }
+
+
+class DRTVLiveIE(InfoExtractor):
+ IE_NAME = 'drtv:live'
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
+ _GEO_COUNTRIES = ['DK']
+ _TEST = {
+ 'url': 'https://www.dr.dk/tv/live/dr1',
+ 'info_dict': {
+ 'id': 'dr1',
+ 'ext': 'mp4',
+ 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel_data = self._download_json(
+ 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
+ channel_id)
+ title = self._live_title(channel_data['Title'])
+
+ formats = []
+ for streaming_server in channel_data.get('StreamingServers', []):
+ server = streaming_server.get('Server')
+ if not server:
+ continue
+ link_type = streaming_server.get('LinkType')
+ for quality in streaming_server.get('Qualities', []):
+ for stream in quality.get('Streams', []):
+ stream_path = stream.get('Stream')
+ if not stream_path:
+ continue
+ stream_url = update_url_query(
+ '%s/%s' % (server, stream_path), {'b': ''})
+ if link_type == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, channel_id, 'mp4',
+ m3u8_id=link_type, fatal=False, live=True))
+ elif link_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(update_url_query(
+ '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
+ channel_id, f4m_id=link_type, fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': channel_id,
+ 'title': title,
+ 'thumbnail': channel_data.get('PrimaryImageUri'),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/dtube.py b/hypervideo_dl/extractor/dtube.py
new file mode 100644
index 0000000..114d2db
--- /dev/null
+++ b/hypervideo_dl/extractor/dtube.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+from socket import timeout
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class DTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
+ _TEST = {
+ 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
+ 'md5': '9f29088fa08d699a7565ee983f56a06e',
+ 'info_dict': {
+ 'id': 'x380jtr1',
+ 'ext': 'mp4',
+ 'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
+ 'description': 'md5:60be222088183be3a42f196f34235776',
+ 'uploader_id': 'broncnutz',
+ 'upload_date': '20190107',
+ 'timestamp': 1546854054,
+ },
+ 'params': {
+ 'format': '480p',
+ },
+ }
+
+ def _real_extract(self, url):
+ uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+ result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
+ 'jsonrpc': '2.0',
+ 'method': 'get_content',
+ 'params': [uploader_id, video_id],
+ }).encode())['result']
+
+ metadata = json.loads(result['json_metadata'])
+ video = metadata['video']
+ content = video['content']
+ info = video.get('info', {})
+ title = info.get('title') or result['title']
+
+ def canonical_url(h):
+ if not h:
+ return None
+ return 'https://video.dtube.top/ipfs/' + h
+
+ formats = []
+ for q in ('240', '480', '720', '1080', ''):
+ video_url = canonical_url(content.get('video%shash' % q))
+ if not video_url:
+ continue
+ format_id = (q + 'p') if q else 'Source'
+ try:
+ self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
+ self._downloader._opener.open(video_url, timeout=5).close()
+ except timeout:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, format_id))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'height': int_or_none(q),
+ 'ext': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('description'),
+ 'thumbnail': canonical_url(info.get('snaphash')),
+ 'tags': content.get('tags') or metadata.get('tags'),
+ 'duration': info.get('duration'),
+ 'formats': formats,
+ 'timestamp': parse_iso8601(result.get('created')),
+ 'uploader_id': uploader_id,
+ }
diff --git a/hypervideo_dl/extractor/dumpert.py b/hypervideo_dl/extractor/dumpert.py
new file mode 100644
index 0000000..d9d9afd
--- /dev/null
+++ b/hypervideo_dl/extractor/dumpert.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+)
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('_', '/')
+ item = self._download_json(
+ 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
+ video_id)['items'][0]
+ title = item['title']
+ media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+ formats = []
+ for variant in media.get('variants', []):
+ uri = variant.get('uri')
+ if not uri:
+ continue
+ version = variant.get('version')
+ formats.append({
+ 'url': uri,
+ 'format_id': version,
+ 'quality': quality(version),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ stills = item.get('stills') or {}
+ for t in ('thumb', 'still'):
+ for s in ('', '-medium', '-large'):
+ still_id = t + s
+ still_url = stills.get(still_id)
+ if not still_url:
+ continue
+ thumbnails.append({
+ 'id': still_id,
+ 'url': still_url,
+ })
+
+ stats = item.get('stats') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'duration': int_or_none(media.get('duration')),
+ 'like_count': int_or_none(stats.get('kudos_total')),
+ 'view_count': int_or_none(stats.get('views_total')),
+ }
diff --git a/hypervideo_dl/extractor/dvtv.py b/hypervideo_dl/extractor/dvtv.py
new file mode 100644
index 0000000..de7f6d6
--- /dev/null
+++ b/hypervideo_dl/extractor/dvtv.py
@@ -0,0 +1,184 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ try_get,
+ unescapeHTML,
+ parse_iso8601,
+)
+
+
+class DVTVIE(InfoExtractor):
+ IE_NAME = 'dvtv'
+ IE_DESC = 'http://video.aktualne.cz/'
+ _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
+ _TESTS = [{
+ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
+ 'md5': '67cb83e4a955d36e1b5d31993134a0c2',
+ 'info_dict': {
+ 'id': 'dc0768de855511e49e4b0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
+ 'duration': 1484,
+ 'upload_date': '20141217',
+ 'timestamp': 1418792400,
+ }
+ }, {
+ 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
+ 'info_dict': {
+ 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+ 'id': '973eb3bc854e11e498be002590604f2e',
+ },
+ 'playlist': [{
+ 'md5': 'da7ca6be4935532241fa9520b3ad91e4',
+ 'info_dict': {
+ 'id': 'b0b40906854d11e4bdad0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
+ 'description': 'md5:0916925dea8e30fe84222582280b47a0',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'md5': '5f7652a08b05009c1292317b449ffea2',
+ 'info_dict': {
+ 'id': '420ad9ec854a11e4bdad0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
+ 'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'md5': '498eb9dfa97169f409126c617e2a3d64',
+ 'info_dict': {
+ 'id': '95d35580846a11e4b6d20025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
+ 'description': 'md5:889fe610a70fee5511dc3326a089188e',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'md5': 'b8dc6b744844032dab6ba3781a7274b9',
+ 'info_dict': {
+ 'id': '6fe14d66853511e4833a0025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
+ 'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
+ }
+ }],
+ }, {
+ 'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
+ 'md5': 'f8efe9656017da948369aa099788c8ea',
+ 'info_dict': {
+ 'id': '3c496fec365911e7a6500025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
+ 'duration': 1103,
+ 'upload_date': '20170511',
+ 'timestamp': 1494514200,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
+ 'only_matching': True,
+ }, {
+ # Test live stream video (liveStarter) parsing
+ 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/',
+ 'md5': '2e552e483f2414851ca50467054f9d5d',
+ 'info_dict': {
+ 'id': '8d116360288011e98c840cc47ab5f122',
+ 'ext': 'mp4',
+ 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu',
+ 'upload_date': '20190204',
+ 'timestamp': 1549289591,
+ },
+ 'params': {
+ # Video content is no longer available
+ 'skip_download': True,
+ },
+ }]
+
+ def _parse_video_metadata(self, js, video_id, timestamp):
+ data = self._parse_json(js, video_id, transform_source=js_to_json)
+ title = unescapeHTML(data['title'])
+
+ live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict)
+ if live_starter:
+ data.update(live_starter)
+
+ formats = []
+ for tracks in data.get('tracks', {}).values():
+ for video in tracks:
+ video_url = video.get('src')
+ if not video_url:
+ continue
+ video_type = video.get('type')
+ ext = determine_ext(video_url, mimetype2ext(video_type))
+ if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif video_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ label = video.get('label')
+ height = self._search_regex(
+ r'^(\d+)[pP]', label or '', 'height', default=None)
+ format_id = ['http']
+ for f in (ext, label):
+ if f:
+ format_id.append(f)
+ formats.append({
+ 'url': video_url,
+ 'format_id': '-'.join(format_id),
+ 'height': int_or_none(height),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': data.get('mediaid') or video_id,
+ 'title': title,
+ 'description': data.get('description'),
+ 'thumbnail': data.get('image'),
+ 'duration': int_or_none(data.get('duration')),
+ 'timestamp': int_or_none(timestamp),
+ 'formats': formats
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'published time', default=None))
+
+ items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
+ if items:
+ return self.playlist_result(
+ [self._parse_video_metadata(i, video_id, timestamp) for i in items],
+ video_id, self._html_search_meta('twitter:title', webpage))
+
+ item = self._search_regex(
+ r'(?s)BBXPlayer\.setup\((.+?)\);',
+ webpage, 'video', default=None)
+ if item:
+ # remove function calls (ex. htmldeentitize)
+ # TODO this should be fixed in a general way in the js_to_json
+ item = re.sub(r'\w+?\((.+)\)', r'\1', item)
+ return self._parse_video_metadata(item, video_id, timestamp)
+
+ raise ExtractorError('Could not find neither video nor playlist')
diff --git a/hypervideo_dl/extractor/dw.py b/hypervideo_dl/extractor/dw.py
new file mode 100644
index 0000000..d740652
--- /dev/null
+++ b/hypervideo_dl/extractor/dw.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+)
+from ..compat import compat_urlparse
+
+
+class DWIE(InfoExtractor):
+ IE_NAME = 'dw'
+ _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
+ _TESTS = [{
+ # video
+ 'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
+ 'md5': '7372046e1815c5a534b43f3c3c36e6e9',
+ 'info_dict': {
+ 'id': '19112290',
+ 'ext': 'mp4',
+ 'title': 'Intelligent light',
+ 'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
+ 'upload_date': '20160311',
+ }
+ }, {
+ # audio
+ 'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941',
+ 'md5': '2814c9a1321c3a51f8a7aeb067a360dd',
+ 'info_dict': {
+ 'id': '19111941',
+ 'ext': 'mp3',
+ 'title': 'WorldLink: My business',
+ 'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
+ 'upload_date': '20160311',
+ }
+ }, {
+ # DW documentaries, only last for one or two weeks
+ 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
+ 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
+ 'info_dict': {
+ 'id': '19274438',
+ 'ext': 'mp4',
+ 'title': 'Welcome to the 90s – Hip Hop',
+ 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
+ 'upload_date': '20160521',
+ },
+ 'skip': 'Video removed',
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ webpage = self._download_webpage(url, media_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ title = hidden_inputs['media_title']
+ media_id = hidden_inputs.get('media_id') or media_id
+
+ if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
+ formats = self._extract_smil_formats(
+ 'http://www.dw.com/smil/v-%s' % media_id, media_id,
+ transform_source=lambda s: s.replace(
+ 'rtmp://tv-od.dw.de/flash/',
+ 'http://tv-download.dw.de/dwtv_video/flv/'))
+ self._sort_formats(formats)
+ else:
+ formats = [{'url': hidden_inputs['file_name']}]
+
+ upload_date = hidden_inputs.get('display_date')
+ if not upload_date:
+ upload_date = self._html_search_regex(
+ r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
+ 'upload date', default=None)
+ upload_date = unified_strdate(upload_date)
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': hidden_inputs.get('preview_image'),
+ 'duration': int_or_none(hidden_inputs.get('file_duration')),
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class DWArticleIE(InfoExtractor):
+ IE_NAME = 'dw:article'
+ _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
+ 'md5': '8ca657f9d068bbef74d6fc38b97fc869',
+ 'info_dict': {
+ 'id': '19105868',
+ 'ext': 'mp4',
+ 'title': 'The harsh life of refugees in Idomeni',
+ 'description': 'md5:196015cc7e48ebf474db9399420043c7',
+ 'upload_date': '20160310',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ media_id = hidden_inputs['media_id']
+ media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
+ media_url = compat_urlparse.urljoin(url, media_path)
+ return self.url_result(media_url, 'DW', media_id)
diff --git a/hypervideo_dl/extractor/eagleplatform.py b/hypervideo_dl/extractor/eagleplatform.py
new file mode 100644
index 0000000..36fef07
--- /dev/null
+++ b/hypervideo_dl/extractor/eagleplatform.py
@@ -0,0 +1,206 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ unsmuggle_url,
+ url_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ eagleplatform:(?P<custom_host>[^/]+):|
+ https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://lenta.ru/news/2015/03/06/navalny/
+ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # http://muz-tv.ru/play/7129/
+ # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+ 'url': 'eagleplatform:media.clipyou.ru:12820',
+ 'md5': '358597369cf8ba56675c1df15e7af624',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'skip': 'Georestricted',
+ }, {
+ # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
+ 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ # Regular iframe embedding
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
+ webpage)
+ if mobj is not None:
+ return mobj.group('url')
+ PLAYER_JS_RE = r'''
+ <script[^>]+
+ src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
+ .+?
+ '''
+ # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
+ mobj = re.search(
+ r'''(?xs)
+ %s
+ <div[^>]+
+ class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
+ data-id=["\'](?P<id>\d+)
+ ''' % PLAYER_JS_RE, webpage)
+ if mobj is not None:
+ return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+ # Generalization of "Javascript code usage", "Combined usage" and
+ # "Usage without attaching to DOM" embeddings (see
+ # http://dultonmedia.github.io/eplayer/)
+ mobj = re.search(
+ r'''(?xs)
+ %s
+ <script>
+ .+?
+ new\s+EaglePlayer\(
+ (?:[^,]+\s*,\s*)?
+ {
+ .+?
+ \bid\s*:\s*["\']?(?P<id>\d+)
+ .+?
+ }
+ \s*\)
+ .+?
+ </script>
+ ''' % PLAYER_JS_RE, webpage)
+ if mobj is not None:
+ return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+
+ @staticmethod
+ def _handle_error(response):
+ status = int_or_none(response.get('status', 200))
+ if status != 200:
+ raise ExtractorError(' '.join(response['errors']), expected=True)
+
+ def _download_json(self, url_or_request, video_id, *args, **kwargs):
+ try:
+ response = super(EaglePlatformIE, self)._download_json(
+ url_or_request, video_id, *args, **kwargs)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError):
+ response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
+ self._handle_error(response)
+ raise
+ return response
+
+ def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ return self._download_json(url_or_request, video_id, note)['data'][0]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+ headers = {}
+ query = {
+ 'id': video_id,
+ }
+
+ referrer = smuggled_data.get('referrer')
+ if referrer:
+ headers['Referer'] = referrer
+ query['referrer'] = referrer
+
+ player_data = self._download_json(
+ 'http://%s/api/player_data' % host, video_id,
+ headers=headers, query=query)
+
+ media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+ title = media['title']
+ description = media.get('description')
+ thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
+ duration = int_or_none(media.get('duration'))
+ view_count = int_or_none(media.get('views'))
+
+ age_restriction = media.get('age_restriction')
+ age_limit = None
+ if age_restriction:
+ age_limit = 0 if age_restriction == 'allow_all' else 18
+
+ secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
+
+ formats = []
+
+ m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+
+ m3u8_formats_dict = {}
+ for f in m3u8_formats:
+ if f.get('height') is not None:
+ m3u8_formats_dict[f['height']] = f
+
+ mp4_data = self._download_json(
+ # Secure mp4 URL is constructed according to Player.prototype.mp4 from
+ # http://lentaru.media.eagleplatform.com/player/player.js
+ re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
+ video_id, 'Downloading mp4 JSON', fatal=False)
+ if mp4_data:
+ for format_id, format_url in mp4_data.get('data', {}).items():
+ if not url_or_none(format_url):
+ continue
+ height = int_or_none(format_id)
+ if height is not None and m3u8_formats_dict.get(height):
+ f = m3u8_formats_dict[height].copy()
+ f.update({
+ 'format_id': f['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ else:
+ f = {
+ 'format_id': 'http-%s' % format_id,
+ 'height': int_or_none(format_id),
+ }
+ f['url'] = format_url
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ebaumsworld.py b/hypervideo_dl/extractor/ebaumsworld.py
new file mode 100644
index 0000000..c97682c
--- /dev/null
+++ b/hypervideo_dl/extractor/ebaumsworld.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class EbaumsWorldIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/',
+ 'info_dict': {
+ 'id': '83367677',
+ 'ext': 'mp4',
+ 'title': 'A Giant Python Opens The Door',
+ 'description': 'This is how nightmares start...',
+ 'uploader': 'jihadpizza',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ config = self._download_xml(
+ 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
+ video_url = config.find('file').text
+
+ return {
+ 'id': video_id,
+ 'title': config.find('title').text,
+ 'url': video_url,
+ 'description': config.find('description').text,
+ 'thumbnail': config.find('image').text,
+ 'uploader': config.find('username').text,
+ }
diff --git a/hypervideo_dl/extractor/echomsk.py b/hypervideo_dl/extractor/echomsk.py
new file mode 100644
index 0000000..6b7cc65
--- /dev/null
+++ b/hypervideo_dl/extractor/echomsk.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class EchoMskIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.echo.msk.ru/sounds/1464134.html',
+ 'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
+ 'info_dict': {
+ 'id': '1464134',
+ 'ext': 'mp3',
+ 'title': 'Особое мнение - 29 декабря 2014, 19:08',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ audio_url = self._search_regex(
+ r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
+
+ title = self._html_search_regex(
+ r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
+ webpage, 'title')
+
+ air_date = self._html_search_regex(
+ r'(?s)<div class="date">(.+?)</div>',
+ webpage, 'date', fatal=False, default=None)
+
+ if air_date:
+ air_date = re.sub(r'(\s)\1+', r'\1', air_date)
+ if air_date:
+ title = '%s - %s' % (title, air_date)
+
+ return {
+ 'id': video_id,
+ 'url': audio_url,
+ 'title': title,
+ }
diff --git a/hypervideo_dl/extractor/egghead.py b/hypervideo_dl/extractor/egghead.py
new file mode 100644
index 0000000..aff9b88
--- /dev/null
+++ b/hypervideo_dl/extractor/egghead.py
@@ -0,0 +1,134 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class EggheadBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, resource, fatal=True):
+ return self._download_json(
+ 'https://app.egghead.io/api/v1/' + path,
+ video_id, 'Downloading %s JSON' % resource, fatal=fatal)
+
+
+class EggheadCourseIE(EggheadBaseIE):
+ IE_DESC = 'egghead.io course'
+ IE_NAME = 'egghead:course'
+ _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
+ 'playlist_count': 29,
+ 'info_dict': {
+ 'id': '72',
+ 'title': 'Professor Frisby Introduces Composable Functional JavaScript',
+ 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
+ },
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ series_path = 'series/' + playlist_id
+ lessons = self._call_api(
+ series_path + '/lessons', playlist_id, 'course lessons')
+
+ entries = []
+ for lesson in lessons:
+ lesson_url = url_or_none(lesson.get('http_url'))
+ if not lesson_url:
+ continue
+ lesson_id = lesson.get('id')
+ if lesson_id:
+ lesson_id = compat_str(lesson_id)
+ entries.append(self.url_result(
+ lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
+
+ course = self._call_api(
+ series_path, playlist_id, 'course', False) or {}
+
+ playlist_id = course.get('id')
+ if playlist_id:
+ playlist_id = compat_str(playlist_id)
+
+ return self.playlist_result(
+ entries, playlist_id, course.get('title'),
+ course.get('description'))
+
+
+class EggheadLessonIE(EggheadBaseIE):
+ IE_DESC = 'egghead.io lesson'
+ IE_NAME = 'egghead:lesson'
+ _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+ 'info_dict': {
+ 'id': '1196',
+ 'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
+ 'ext': 'mp4',
+ 'title': 'Create linear data flow with container style types (Box)',
+ 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ 'timestamp': 1481296768,
+ 'upload_date': '20161209',
+ 'duration': 304,
+ 'view_count': 0,
+ 'tags': 'count:2',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ lesson = self._call_api(
+ 'lessons/' + display_id, display_id, 'lesson')
+
+ lesson_id = compat_str(lesson['id'])
+ title = lesson['title']
+
+ formats = []
+ for _, format_url in lesson['media_urls'].items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lesson_id, 'mp4', entry_protocol='m3u8',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lesson_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': lesson_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': lesson.get('summary'),
+ 'thumbnail': lesson.get('thumb_nail'),
+ 'timestamp': unified_timestamp(lesson.get('published_at')),
+ 'duration': int_or_none(lesson.get('duration')),
+ 'view_count': int_or_none(lesson.get('plays_count')),
+ 'tags': try_get(lesson, lambda x: x['tag_list'], list),
+ 'series': try_get(
+ lesson, lambda x: x['series']['title'], compat_str),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ehow.py b/hypervideo_dl/extractor/ehow.py
new file mode 100644
index 0000000..b1cd4f5
--- /dev/null
+++ b/hypervideo_dl/extractor/ehow.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class EHowIE(InfoExtractor):
+ IE_NAME = 'eHow'
+ _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
+ 'md5': '9809b4e3f115ae2088440bcb4efbf371',
+ 'info_dict': {
+ 'id': '12245069',
+ 'ext': 'flv',
+ 'title': 'Hardwood Flooring Basics',
+ 'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
+ 'uploader': 'Erick Nathan',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._search_regex(
+ r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
+ final_url = compat_urllib_parse_unquote(video_url)
+ uploader = self._html_search_meta('uploader', webpage)
+ title = self._og_search_title(webpage).replace(' | eHow', '')
+
+ return {
+ 'id': video_id,
+ 'url': final_url,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'uploader': uploader,
+ }
diff --git a/hypervideo_dl/extractor/eighttracks.py b/hypervideo_dl/extractor/eighttracks.py
new file mode 100644
index 0000000..9b1e1ce
--- /dev/null
+++ b/hypervideo_dl/extractor/eighttracks.py
@@ -0,0 +1,164 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import random
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+)
+
+
+class EightTracksIE(InfoExtractor):
+ IE_NAME = '8tracks'
+ _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+ _TEST = {
+ 'name': 'EightTracks',
+ 'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
+ 'info_dict': {
+ 'id': '1336550',
+ 'display_id': 'hypervideo-test-tracks-a',
+ 'description': "test chars: \"'/\\ä↭",
+ 'title': "hypervideo test tracks \"'/\\ä↭<>",
+ },
+ 'playlist': [
+ {
+ 'md5': '96ce57f24389fc8734ce47f4c1abcc55',
+ 'info_dict': {
+ 'id': '11885610',
+ 'ext': 'm4a',
+ 'title': "youtue-dl project<>\"' - hypervideo test track 1 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '4ab26f05c1f7291ea460a3920be8021f',
+ 'info_dict': {
+ 'id': '11885608',
+ 'ext': 'm4a',
+ 'title': "hypervideo project - hypervideo test track 2 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'd30b5b5f74217410f4689605c35d1fd7',
+ 'info_dict': {
+ 'id': '11885679',
+ 'ext': 'm4a',
+ 'title': "hypervideo project as well - hypervideo test track 3 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '4eb0a669317cd725f6bbd336a29f923a',
+ 'info_dict': {
+ 'id': '11885680',
+ 'ext': 'm4a',
+ 'title': "hypervideo project as well - hypervideo test track 4 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '1893e872e263a2705558d1d319ad19e8',
+ 'info_dict': {
+ 'id': '11885682',
+ 'ext': 'm4a',
+ 'title': "PH - hypervideo test track 5 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'b673c46f47a216ab1741ae8836af5899',
+ 'info_dict': {
+ 'id': '11885683',
+ 'ext': 'm4a',
+ 'title': "PH - hypervideo test track 6 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '1d74534e95df54986da7f5abf7d842b7',
+ 'info_dict': {
+ 'id': '11885684',
+ 'ext': 'm4a',
+ 'title': "phihag - hypervideo test track 7 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
+ 'info_dict': {
+ 'id': '11885685',
+ 'ext': 'm4a',
+ 'title': "phihag - hypervideo test track 8 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ }
+ ]
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+ playlist_id)
+
+ session = str(random.randint(0, 1000000000))
+ mix_id = data['id']
+ track_count = data['tracks_count']
+ duration = data['duration']
+ avg_song_duration = float(duration) / track_count
+ # duration is sometimes negative, use predefined avg duration
+ if avg_song_duration <= 0:
+ avg_song_duration = 300
+ first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
+ next_url = first_url
+ entries = []
+
+ for i in range(track_count):
+ api_json = None
+ download_tries = 0
+
+ while api_json is None:
+ try:
+ api_json = self._download_webpage(
+ next_url, playlist_id,
+ note='Downloading song information %d/%d' % (i + 1, track_count),
+ errnote='Failed to download song information')
+ except ExtractorError:
+ if download_tries > 3:
+ raise
+ else:
+ download_tries += 1
+ self._sleep(avg_song_duration, playlist_id)
+
+ api_data = json.loads(api_json)
+ track_data = api_data['set']['track']
+ info = {
+ 'id': compat_str(track_data['id']),
+ 'url': track_data['track_file_stream_url'],
+ 'title': track_data['performer'] + ' - ' + track_data['name'],
+ 'raw_title': track_data['name'],
+ 'uploader_id': data['user']['login'],
+ 'ext': 'm4a',
+ }
+ entries.append(info)
+
+ next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
+ session, mix_id, track_data['id'])
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': compat_str(mix_id),
+ 'display_id': playlist_id,
+ 'title': data.get('name'),
+ 'description': data.get('description'),
+ }
diff --git a/hypervideo_dl/extractor/einthusan.py b/hypervideo_dl/extractor/einthusan.py
new file mode 100644
index 0000000..4e0f8bc
--- /dev/null
+++ b/hypervideo_dl/extractor/einthusan.py
@@ -0,0 +1,111 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ extract_attributes,
+ ExtractorError,
+ get_elements_by_class,
+ urlencode_postdata,
+)
+
+
+class EinthusanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://einthusan.tv/movie/watch/9097/',
+ 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
+ 'info_dict': {
+ 'id': '9097',
+ 'ext': 'mp4',
+ 'title': 'Ae Dil Hai Mushkil',
+ 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.com/movie/watch/9097/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
+ 'only_matching': True,
+ }]
+
+ # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
+ def _decrypt(self, encrypted_data, video_id):
+ return self._parse_json(compat_b64decode((
+ encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
+ )).decode('utf-8'), video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
+
+ player_params = extract_attributes(self._search_regex(
+ r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
+
+ page_id = self._html_search_regex(
+ '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
+ video_data = self._download_json(
+ 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
+ data=urlencode_postdata({
+ 'xEvent': 'UIVideoPlayer.PingOutcome',
+ 'xJson': json.dumps({
+ 'EJOutcomes': player_params['data-ejpingables'],
+ 'NativeHLS': False
+ }),
+ 'arcVersion': 3,
+ 'appVersion': 59,
+ 'gorilla.csrf.Token': page_id,
+ }))['Data']
+
+ if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
+ raise ExtractorError(
+ 'Download rate reached. Please try again later.', expected=True)
+
+ ej_links = self._decrypt(video_data['EJLinks'], video_id)
+
+ formats = []
+
+ m3u8_url = ej_links.get('HLSLink')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
+
+ mp4_url = ej_links.get('MP4Link')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ })
+
+ self._sort_formats(formats)
+
+ description = get_elements_by_class('synopsis', webpage)[0]
+ thumbnail = self._html_search_regex(
+ r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
+ webpage, 'thumbnail url', fatal=False, group='url')
+ if thumbnail is not None:
+ thumbnail = compat_urlparse.urljoin(url, thumbnail)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/eitb.py b/hypervideo_dl/extractor/eitb.py
new file mode 100644
index 0000000..ee5ead1
--- /dev/null
+++ b/hypervideo_dl/extractor/eitb.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class EitbIE(InfoExtractor):
+ IE_NAME = 'eitb.tv'
+ _VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/',
+ 'md5': 'edf4436247185adee3ea18ce64c47998',
+ 'info_dict': {
+ 'id': '4090227752001',
+ 'ext': 'mp4',
+ 'title': '60 minutos (Lasa y Zabala, 30 años)',
+ 'description': 'Programa de reportajes de actualidad.',
+ 'duration': 3996.76,
+ 'timestamp': 1381789200,
+ 'upload_date': '20131014',
+ 'tags': list,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id,
+ video_id, 'Downloading video JSON')
+
+ media = video['web_media'][0]
+
+ formats = []
+ for rendition in media['RENDITIONS']:
+ video_url = rendition.get('PMD_URL')
+ if not video_url:
+ continue
+ tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
+ format_id = 'http'
+ if tbr:
+ format_id += '-%d' % int(tbr)
+ formats.append({
+ 'url': rendition['PMD_URL'],
+ 'format_id': format_id,
+ 'width': int_or_none(rendition.get('FRAME_WIDTH')),
+ 'height': int_or_none(rendition.get('FRAME_HEIGHT')),
+ 'tbr': tbr,
+ })
+
+ hls_url = media.get('HLS_SURL')
+ if hls_url:
+ request = sanitized_Request(
+ 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
+ headers={'Referer': url})
+ token_data = self._download_json(
+ request, video_id, 'Downloading auth token', fatal=False)
+ if token_data:
+ token = token_data.get('token')
+ if token:
+ formats.extend(self._extract_m3u8_formats(
+ '%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False))
+
+ hds_url = media.get('HDS_SURL')
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(
+ '%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'),
+ video_id, f4m_id='hds', fatal=False))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
+ 'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'),
+ 'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'),
+ 'duration': float_or_none(media.get('LENGTH'), 1000),
+ 'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '),
+ 'tags': media.get('TAGS'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ellentube.py b/hypervideo_dl/extractor/ellentube.py
new file mode 100644
index 0000000..5444732
--- /dev/null
+++ b/hypervideo_dl/extractor/ellentube.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ float_or_none,
+ int_or_none,
+ try_get,
+)
+
+
+class EllenTubeBaseIE(InfoExtractor):
+ def _extract_data_config(self, webpage, video_id):
+ details = self._search_regex(
+ r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage,
+ 'details')
+ return self._parse_json(
+ extract_attributes(details)['data-config'], video_id)
+
+ def _extract_video(self, data, video_id):
+ title = data['title']
+
+ formats = []
+ duration = None
+ for entry in data.get('media'):
+ if entry.get('id') == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ entry['url'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ duration = int_or_none(entry.get('duration'))
+ break
+ self._sort_formats(formats)
+
+ def get_insight(kind):
+ return int_or_none(try_get(
+ data, lambda x: x['insight']['%ss' % kind]))
+
+ return {
+ 'extractor_key': EllenTubeIE.ie_key(),
+ 'id': video_id,
+ 'title': title,
+ 'description': data.get('description'),
+ 'duration': duration,
+ 'thumbnail': data.get('thumbnail'),
+ 'timestamp': float_or_none(data.get('publishTime'), scale=1000),
+ 'view_count': get_insight('view'),
+ 'like_count': get_insight('like'),
+ 'formats': formats,
+ }
+
+
+class EllenTubeIE(EllenTubeBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ ellentube:|
+ https://api-prod\.ellentube\.com/ellenapi/api/item/
+ )
+ (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
+ '''
+ _TESTS = [{
+ 'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3',
+ 'md5': '2fabc277131bddafdd120e0fc0f974c9',
+ 'info_dict': {
+ 'id': '0822171c-3829-43bf-b99f-d77358ae75e3',
+ 'ext': 'mp4',
+ 'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck',
+ 'description': 'md5:76e3355e2242a78ad9e3858e5616923f',
+ 'thumbnail': r're:^https?://.+?',
+ 'duration': 514,
+ 'timestamp': 1508505120,
+ 'upload_date': '20171020',
+ 'view_count': int,
+ 'like_count': int,
+ }
+ }, {
+ 'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(
+ 'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id,
+ video_id)
+ return self._extract_video(data, video_id)
+
+
+class EllenTubeVideoIE(EllenTubeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html'
+ _TEST = {
+ 'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._extract_data_config(webpage, display_id)['id']
+ return self.url_result(
+ 'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(),
+ video_id=video_id)
+
+
+class EllenTubePlaylistIE(EllenTubeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html'
+ _TESTS = [{
+ 'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html',
+ 'info_dict': {
+ 'id': 'dax-shepard-jordan-fisher-haim',
+ 'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM",
+ 'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c',
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._extract_data_config(webpage, display_id)['data']
+ feed = self._download_json(
+ 'https://api-prod.ellentube.com/ellenapi/api/feed/?%s'
+ % data['filter'], display_id)
+ entries = [
+ self._extract_video(elem, elem['id'])
+ for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')]
+ return self.playlist_result(
+ entries, display_id, data.get('title'),
+ clean_html(data.get('description')))
diff --git a/hypervideo_dl/extractor/elpais.py b/hypervideo_dl/extractor/elpais.py
new file mode 100644
index 0000000..b89f6db
--- /dev/null
+++ b/hypervideo_dl/extractor/elpais.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import strip_jsonp, unified_strdate
+
+
+class ElPaisIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
+ IE_DESC = 'El País'
+
+ _TESTS = [{
+ 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
+ 'md5': '98406f301f19562170ec071b83433d55',
+ 'info_dict': {
+ 'id': 'tiempo-nuevo-recetas-viejas',
+ 'ext': 'mp4',
+ 'title': 'Tiempo nuevo, recetas viejas',
+ 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
+ 'upload_date': '20140206',
+ }
+ }, {
+ 'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+ 'md5': '3bd5b09509f3519d7d9e763179b013de',
+ 'info_dict': {
+ 'id': '1456340311_668921',
+ 'ext': 'mp4',
+ 'title': 'Cómo hacer el mejor café con cafetera italiana',
+ 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+ 'upload_date': '20160303',
+ }
+ }, {
+ 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html',
+ 'md5': '9c79923a118a067e1a45789e1e0b0f9c',
+ 'info_dict': {
+ 'id': '1485456786_417876',
+ 'ext': 'mp4',
+ 'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años',
+ 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
+ 'upload_date': '20170127',
+ },
+ }, {
+ 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
+ 'info_dict': {
+ 'id': '1487062137_075943',
+ 'ext': 'mp4',
+ 'title': 'Disyuntivas',
+ 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
+ 'upload_date': '20170214',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ prefix = self._html_search_regex(
+ r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
+ id_multimedia = self._search_regex(
+ r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None)
+ if id_multimedia:
+ url_info = self._download_json(
+ 'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp)
+ video_suffix = url_info['mp4']
+ else:
+ video_suffix = self._search_regex(
+ r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
+ video_url = prefix + video_suffix
+ thumbnail_suffix = self._search_regex(
+ r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+ webpage, 'thumbnail URL', default=None)
+ thumbnail = (
+ None if thumbnail_suffix is None
+ else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
+ title = self._html_search_regex(
+ (r"tituloVideo\s*=\s*'([^']+)'",
+ r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+ r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'<p class="date-header date-int updated"\s+title="([^"]+)">',
+ webpage, 'upload date', default=None) or self._html_search_meta(
+ 'datePublished', webpage, 'timestamp'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }
diff --git a/hypervideo_dl/extractor/embedly.py b/hypervideo_dl/extractor/embedly.py
new file mode 100644
index 0000000..a5820b2
--- /dev/null
+++ b/hypervideo_dl/extractor/embedly.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class EmbedlyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
+ _TESTS = [{
+ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
diff --git a/hypervideo_dl/extractor/engadget.py b/hypervideo_dl/extractor/engadget.py
new file mode 100644
index 0000000..65635c1
--- /dev/null
+++ b/hypervideo_dl/extractor/engadget.py
@@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class EngadgetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ # video with 5min ID
+ 'url': 'http://www.engadget.com/video/518153925/',
+ 'md5': 'c6820d4828a5064447a4d9fc73f312c9',
+ 'info_dict': {
+ 'id': '518153925',
+ 'ext': 'mp4',
+ 'title': 'Samsung Galaxy Tab Pro 8.4 Review',
+ },
+ 'add_ie': ['FiveMin'],
+ }, {
+ # video with vidible ID
+ 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result('aol-video:%s' % video_id)
diff --git a/hypervideo_dl/extractor/eporner.py b/hypervideo_dl/extractor/eporner.py
new file mode 100644
index 0000000..bfecd3a
--- /dev/null
+++ b/hypervideo_dl/extractor/eporner.py
@@ -0,0 +1,132 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ encode_base_n,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_duration,
+ str_to_int,
+ url_or_none,
+)
+
+
+class EpornerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+ _TESTS = [{
+ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
+ 'md5': '39d486f046212d8e1b911c52ab4691f8',
+ 'info_dict': {
+ 'id': 'qlDUmNsj6VS',
+ 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
+ 'ext': 'mp4',
+ 'title': 'Infamous Tiffany Teen Strip Tease Video',
+ 'description': 'md5:764f39abf932daafa37485eb46efa152',
+ 'timestamp': 1232520922,
+ 'upload_date': '20090121',
+ 'duration': 1838,
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118'
+ }
+ }, {
+ # New (May 2016) URL layout
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+
+ video_id = self._match_id(urlh.geturl())
+
+ hash = self._search_regex(
+ r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
+
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?) - EPORNER', webpage, 'title')
+
+ # Reverse engineered from vjs.js
+ def calc_hash(s):
+ return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
+
+ video = self._download_json(
+ 'http://www.eporner.com/xhr/video/%s' % video_id,
+ display_id, note='Downloading video JSON',
+ query={
+ 'hash': calc_hash(hash),
+ 'device': 'generic',
+ 'domain': 'www.eporner.com',
+ 'fallback': 'false',
+ })
+
+ if video.get('available') is False:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, video['message']), expected=True)
+
+ sources = video['sources']
+
+ formats = []
+ for kind, formats_dict in sources.items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for format_id, format_dict in formats_dict.items():
+ if not isinstance(format_dict, dict):
+ continue
+ src = url_or_none(format_dict.get('src'))
+ if not src or not src.startswith('http'):
+ continue
+ if kind == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ src, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=kind, fatal=False))
+ else:
+ height = int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id, 'height', default=None))
+ fps = int_or_none(self._search_regex(
+ r'(\d+)fps', format_id, 'fps', default=None))
+
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ 'height': height,
+ 'fps': fps,
+ })
+ self._sort_formats(formats)
+
+ json_ld = self._search_json_ld(webpage, display_id, default={})
+
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, default=None))
+ view_count = str_to_int(self._search_regex(
+ r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
+ webpage, 'view count', default=None))
+
+ return merge_dicts(json_ld, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ })
diff --git a/hypervideo_dl/extractor/eroprofile.py b/hypervideo_dl/extractor/eroprofile.py
new file mode 100644
index 0000000..c460dc7
--- /dev/null
+++ b/hypervideo_dl/extractor/eroprofile.py
@@ -0,0 +1,92 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+from ..utils import (
+ ExtractorError,
+ merge_dicts,
+)
+
+
+class EroProfileIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
+ 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
+ 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
+ 'info_dict': {
+ 'id': '3733775',
+ 'display_id': 'sexy-babe-softcore',
+ 'ext': 'm4v',
+ 'title': 'sexy babe softcore',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Video not found',
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ query = compat_urllib_parse_urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ self.raise_login_required('This video requires login')
+
+ video_id = self._search_regex(
+ [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
+ webpage, 'video id', default=None)
+
+ title = self._html_search_regex(
+ (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
+ webpage, 'title')
+
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
diff --git a/hypervideo_dl/extractor/escapist.py b/hypervideo_dl/extractor/escapist.py
new file mode 100644
index 0000000..4cd815e
--- /dev/null
+++ b/hypervideo_dl/extractor/escapist.py
@@ -0,0 +1,111 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ clean_html,
+ int_or_none,
+ float_or_none,
+)
+
+
+def _decrypt_config(key, string):
+ a = ''
+ i = ''
+ r = ''
+
+ while len(a) < (len(string) / 2):
+ a += key
+
+ a = a[0:int(len(string) / 2)]
+
+ t = 0
+ while t < len(string):
+ i += chr(int(string[t] + string[t + 1], 16))
+ t += 2
+
+ icko = [s for s in i]
+
+ for t, c in enumerate(a):
+ r += chr(ord(c) ^ ord(icko[t]))
+
+ return r
+
+
+class EscapistIE(InfoExtractor):
+ _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
+ 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
+ 'info_dict': {
+ 'id': '6618',
+ 'ext': 'mp4',
+ 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
+ 'title': "Breaking Down Baldur's Gate",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 264,
+ 'uploader': 'The Escapist',
+ }
+ }, {
+ 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
+ 'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
+ 'info_dict': {
+ 'id': '10044',
+ 'ext': 'mp4',
+ 'description': 'This week, Zero Punctuation reviews Evolve.',
+ 'title': 'Evolve - One vs Multiplayer',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 304,
+ 'uploader': 'The Escapist',
+ }
+ }, {
+ 'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ ims_video = self._parse_json(
+ self._search_regex(
+ r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
+ video_id)
+ video_id = ims_video['videoID']
+ key = ims_video['hash']
+
+ config = self._download_webpage(
+ 'http://www.escapistmagazine.com/videos/vidconfig.php',
+ video_id, 'Downloading video config', headers={
+ 'Referer': url,
+ }, query={
+ 'videoID': video_id,
+ 'hash': key,
+ })
+
+ data = self._parse_json(_decrypt_config(key, config), video_id)
+
+ video_data = data['videoData']
+
+ title = clean_html(video_data['title'])
+
+ formats = [{
+ 'url': video['src'],
+ 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
+ 'height': int_or_none(video.get('res')),
+ } for video in data['files']['videos']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
+ 'description': self._og_search_description(webpage),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'uploader': video_data.get('publisher'),
+ 'series': video_data.get('show'),
+ }
diff --git a/hypervideo_dl/extractor/espn.py b/hypervideo_dl/extractor/espn.py
new file mode 100644
index 0000000..6cf05e6
--- /dev/null
+++ b/hypervideo_dl/extractor/espn.py
@@ -0,0 +1,238 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .once import OnceIE
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class ESPNIE(OnceIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ (?:
+ (?:(?:\w+\.)+)?espn\.go|
+ (?:www\.)?espn
+ )\.com/
+ (?:
+ (?:
+ video/(?:clip|iframe/twitter)|
+ watch/player
+ )
+ (?:
+ .*?\?.*?\bid=|
+ /_/id/
+ )|
+ [^/]+/video/
+ )
+ )|
+ (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://espn.go.com/video/clip?id=10365079',
+ 'info_dict': {
+ 'id': '10365079',
+ 'ext': 'mp4',
+ 'title': '30 for 30 Shorts: Judging Jewell',
+ 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
+ 'timestamp': 1390936111,
+ 'upload_date': '20140128',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
+ 'info_dict': {
+ 'id': '18910086',
+ 'ext': 'mp4',
+ 'title': 'Kyrie spins around defender for two',
+ 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
+ 'timestamp': 1489539155,
+ 'upload_date': '20170315',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?id=19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player/_/id/19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip?id=10365079',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/video/clip/_/id/17989860',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_json(
+ 'http://api-app.espn.com/v1/video/clips/%s' % video_id,
+ video_id)['videos'][0]
+
+ title = clip['headline']
+
+ format_urls = set()
+ formats = []
+
+ def traverse_source(source, base_source_id=None):
+ for source_id, source in source.items():
+ if source_id == 'alert':
+ continue
+ elif isinstance(source, compat_str):
+ extract_source(source, base_source_id)
+ elif isinstance(source, dict):
+ traverse_source(
+ source,
+ '%s-%s' % (base_source_id, source_id)
+ if base_source_id else source_id)
+
+ def extract_source(source_url, source_id=None):
+ if source_url in format_urls:
+ return
+ format_urls.add(source_url)
+ ext = determine_ext(source_url)
+ if OnceIE.suitable(source_url):
+ formats.extend(self._extract_once_formats(source_url))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, video_id, f4m_id=source_id, fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=source_id, fatal=False))
+ else:
+ f = {
+ 'url': source_url,
+ 'format_id': source_id,
+ }
+ mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(1)),
+ 'fps': int(mobj.group(2)),
+ 'tbr': int(mobj.group(3)),
+ })
+ if source_id == 'mezzanine':
+ f['preference'] = 1
+ formats.append(f)
+
+ links = clip.get('links', {})
+ traverse_source(links.get('source', {}))
+ traverse_source(links.get('mobile', {}))
+ self._sort_formats(formats)
+
+ description = clip.get('caption') or clip.get('description')
+ thumbnail = clip.get('thumbnail')
+ duration = int_or_none(clip.get('duration'))
+ timestamp = unified_timestamp(clip.get('originalPublishDate'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class ESPNArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://espn.go.com/nba/recap?gameId=400793786',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
+ webpage, 'video id', group='id')
+
+ return self.url_result(
+ 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
+
+
+class FiveThirtyEightIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
+ 'info_dict': {
+ 'id': '56032156',
+ 'ext': 'flv',
+ 'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
+ 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ embed_url = self._search_regex(
+ r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
+ webpage, 'embed url')
+
+ return self.url_result(embed_url, 'AbcNewsVideo')
diff --git a/hypervideo_dl/extractor/esri.py b/hypervideo_dl/extractor/esri.py
new file mode 100644
index 0000000..e9dcaeb
--- /dev/null
+++ b/hypervideo_dl/extractor/esri.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ parse_filesize,
+ unified_strdate,
+)
+
+
+class EsriVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
+ 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
+ 'info_dict': {
+ 'id': '1124',
+ 'ext': 'mp4',
+ 'title': 'ArcGIS Online - Developing Applications',
+ 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 185,
+ 'upload_date': '20120419',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for width, height, content in re.findall(
+ r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
+ for video_url, ext, filesize in re.findall(
+ r'<a[^>]+href="([^"]+)">([^<]+)&nbsp;\(([^<]+)\)</a>', content):
+ formats.append({
+ 'url': compat_urlparse.urljoin(url, video_url),
+ 'ext': ext.lower(),
+ 'format_id': '%s-%s' % (ext.lower(), height),
+ 'width': int(width),
+ 'height': int(height),
+ 'filesize_approx': parse_filesize(filesize),
+ })
+ self._sort_formats(formats)
+
+ title = self._html_search_meta('title', webpage, 'title')
+ description = self._html_search_meta(
+ 'description', webpage, 'description', fatal=False)
+
+ thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
+
+ duration = int_or_none(self._search_regex(
+ [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
+ webpage, 'duration', fatal=False))
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'last-modified', webpage, 'upload date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/europa.py b/hypervideo_dl/extractor/europa.py
new file mode 100644
index 0000000..2c1c747
--- /dev/null
+++ b/hypervideo_dl/extractor/europa.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ orderedSet,
+ parse_duration,
+ qualities,
+ unified_strdate,
+ xpath_text
+)
+
+
+class EuropaIE(InfoExtractor):
+ _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
+ 'md5': '574f080699ddd1e19a675b0ddf010371',
+ 'info_dict': {
+ 'id': 'I107758',
+ 'ext': 'mp4',
+ 'title': 'TRADE - Wikileaks on TTIP',
+ 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150811',
+ 'duration': 34,
+ 'view_count': int,
+ 'formats': 'mincount:3',
+ }
+ }, {
+ 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ playlist = self._download_xml(
+ 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
+
+ def get_item(type_, preference):
+ items = {}
+ for item in playlist.findall('./info/%s/item' % type_):
+ lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
+ if lang and label:
+ items[lang] = label.strip()
+ for p in preference:
+ if items.get(p):
+ return items[p]
+
+ query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ preferred_lang = query.get('sitelang', ('en', ))[0]
+
+ preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
+
+ title = get_item('title', preferred_langs) or video_id
+ description = get_item('description', preferred_langs)
+ thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
+ upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
+ duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
+ view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
+
+ language_preference = qualities(preferred_langs[::-1])
+
+ formats = []
+ for file_ in playlist.findall('./files/file'):
+ video_url = xpath_text(file_, './url')
+ if not video_url:
+ continue
+ lang = xpath_text(file_, './lg')
+ formats.append({
+ 'url': video_url,
+ 'format_id': lang,
+ 'format_note': xpath_text(file_, './lglabel'),
+ 'language_preference': language_preference(lang)
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/expotv.py b/hypervideo_dl/extractor/expotv.py
new file mode 100644
index 0000000..95a8977
--- /dev/null
+++ b/hypervideo_dl/extractor/expotv.py
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+)
+
+
+class ExpoTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
+ _TEST = {
+ 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
+ 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
+ 'info_dict': {
+ 'id': '667916',
+ 'ext': 'mp4',
+ 'title': 'NYX Butter Lipstick Little Susie',
+ 'description': 'Goes on like butter, but looks better!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Stephanie S.',
+ 'upload_date': '20150520',
+ 'view_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ player_key = self._search_regex(
+ r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
+ config = self._download_json(
+ 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key),
+ video_id, 'Downloading video configuration')
+
+ formats = []
+ for fcfg in config['sources']:
+ media_url = fcfg.get('file')
+ if not media_url:
+ continue
+ if fcfg.get('type') == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
+ else:
+ formats.append({
+ 'url': media_url,
+ 'height': int_or_none(fcfg.get('height')),
+ 'format_id': fcfg.get('label'),
+ 'ext': self._search_regex(
+ r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
+ 'file extension', default=None) or fcfg.get('type'),
+ })
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = config.get('image')
+ view_count = int_or_none(self._search_regex(
+ r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
+ uploader = self._search_regex(
+ r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
+ fatal=False)
+ upload_date = unified_strdate(self._search_regex(
+ r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
+ fatal=False), day_first=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'view_count': view_count,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ }
diff --git a/hypervideo_dl/extractor/expressen.py b/hypervideo_dl/extractor/expressen.py
new file mode 100644
index 0000000..dc8b855
--- /dev/null
+++ b/hypervideo_dl/extractor/expressen.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class ExpressenIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:expressen|di)\.se/
+ (?:(?:tvspelare/video|videoplayer/embed)/)?
+ tv/(?:[^/]+/)*
+ (?P<id>[^/?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
+ 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+ 'info_dict': {
+ 'id': '8690962',
+ 'ext': 'mp4',
+ 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
+ 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 788,
+ 'timestamp': 1526639109,
+ 'upload_date': '20180518',
+ },
+ }, {
+ 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ def extract_data(name):
+ return self._parse_json(
+ self._search_regex(
+ r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
+ webpage, 'info', group='value'),
+ display_id, transform_source=unescapeHTML)
+
+ info = extract_data('video-tracking-info')
+ video_id = info['videoId']
+
+ data = extract_data('article-data')
+ stream = data['stream']
+
+ if determine_ext(stream) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ stream, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ formats = [{
+ 'url': stream,
+ }]
+ self._sort_formats(formats)
+
+ title = info.get('titleRaw') or data['title']
+ description = info.get('descriptionRaw')
+ thumbnail = info.get('socialMediaImage') or data.get('image')
+ duration = int_or_none(info.get('videoTotalSecondsDuration')
+ or data.get('totalSecondsDuration'))
+ timestamp = unified_timestamp(info.get('publishDate'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/extractors.py b/hypervideo_dl/extractor/extractors.py
new file mode 100644
index 0000000..402e542
--- /dev/null
+++ b/hypervideo_dl/extractor/extractors.py
@@ -0,0 +1,1648 @@
+# flake8: noqa
+from __future__ import unicode_literals
+
+from .abc import (
+ ABCIE,
+ ABCIViewIE,
+)
+from .abcnews import (
+ AbcNewsIE,
+ AbcNewsVideoIE,
+)
+from .abcotvs import (
+ ABCOTVSIE,
+ ABCOTVSClipsIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
+from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
+from .adobetv import (
+ AdobeTVEmbedIE,
+ AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
+ AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import (
+ AENetworksIE,
+ AENetworksCollectionIE,
+ AENetworksShowIE,
+ HistoryTopicIE,
+ HistoryPlayerIE,
+ BiographyIE,
+)
+from .afreecatv import AfreecaTVIE
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .amara import AmaraIE
+from .amcnetworks import AMCNetworksIE
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
+from .animeondemand import AnimeOnDemandIE
+from .anvato import AnvatoIE
+from .aol import AolIE
+from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
+from .apa import APAIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
+from .applepodcasts import ApplePodcastsIE
+from .archiveorg import ArchiveOrgIE
+from .arcpublishing import ArcPublishingIE
+from .arkena import ArkenaIE
+from .ard import (
+ ARDBetaMediathekIE,
+ ARDIE,
+ ARDMediathekIE,
+)
+from .arte import (
+ ArteTVIE,
+ ArteTVEmbedIE,
+ ArteTVPlaylistIE,
+)
+from .arnes import ArnesIE
+from .asiancrush import (
+ AsianCrushIE,
+ AsianCrushPlaylistIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atttechchannel import ATTTechChannelIE
+from .atvat import ATVAtIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .awaan import (
+ AWAANIE,
+ AWAANVideoIE,
+ AWAANLiveIE,
+ AWAANSeasonIE,
+)
+from .azmedien import AZMedienIE
+from .baidu import BaiduVideoIE
+from .bandaichannel import BandaiChannelIE
+from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
+from .bbc import (
+ BBCCoUkIE,
+ BBCCoUkArticleIE,
+ BBCCoUkIPlayerEpisodesIE,
+ BBCCoUkIPlayerGroupIE,
+ BBCCoUkPlaylistIE,
+ BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
+from .beatport import BeatportIE
+from .bet import BetIE
+from .bfi import BFIPlayerIE
+from .bfmtv import (
+ BFMTVIE,
+ BFMTVLiveIE,
+ BFMTVArticleIE,
+)
+from .bibeltv import BibelTVIE
+from .bigflix import BigflixIE
+from .bild import BildIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliBangumiIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
+)
+from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+ BitChuteIE,
+ BitChuteChannelIE,
+)
+from .biqle import BIQLEIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bongacams import BongaCamsIE
+from .bostonglobe import BostonGlobeIE
+from .box import BoxIE
+from .bpb import BpbIE
+from .br import (
+ BRIE,
+ BRMediathekIE,
+)
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .businessinsider import BusinessInsiderIE
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
+from .cammodels import CamModelsIE
+from .camtube import CamTubeIE
+from .camwithher import CamWithHerIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import (
+ CanvasIE,
+ CanvasEenIE,
+ VrtNUIE,
+ DagelijkseKostIE,
+)
+from .carambatv import (
+ CarambaTVIE,
+ CarambaTVPageIE,
+)
+from .cartoonnetwork import CartoonNetworkIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+ CBCWatchVideoIE,
+ CBCWatchIE,
+ CBCOlympicsIE,
+)
+from .cbs import CBSIE
+from .cbslocal import (
+ CBSLocalIE,
+ CBSLocalArticleIE,
+)
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+ CBSNewsEmbedIE,
+ CBSNewsIE,
+ CBSNewsLiveVideoIE,
+)
+from .cbssports import (
+ CBSSportsEmbedIE,
+ CBSSportsIE,
+ TwentyFourSevenSportsIE,
+)
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
+from .ccma import CCMAIE
+from .cctv import CCTVIE
+from .cda import CDAIE
+from .ceskatelevize import (
+ CeskaTelevizeIE,
+ CeskaTelevizePoradyIE,
+)
+from .channel9 import Channel9IE
+from .charlierose import CharlieRoseIE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+ ChirbitIE,
+ ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
+from .cjsw import CJSWIE
+from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
+from .cliprs import ClipRsIE
+from .clipsyndicate import ClipsyndicateIE
+from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import (
+ CNBCIE,
+ CNBCVideoIE,
+)
+from .cnn import (
+ CNNIE,
+ CNNBlogsIE,
+ CNNArticleIE,
+)
+from .coub import CoubIE
+from .comedycentral import (
+ ComedyCentralIE,
+ ComedyCentralTVIE,
+)
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+)
+from .condenast import CondeNastIE
+from .contv import CONtvIE
+from .corus import CorusIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crunchyroll import (
+ CrunchyrollIE,
+ CrunchyrollShowPlaylistIE
+)
+from .cspan import CSpanIE
+from .ctsnews import CtsNewsIE
+from .ctv import CTVIE
+from .ctvnews import CTVNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionIE,
+)
+from .cwtv import CWTVIE
+from .dailymail import DailyMailIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+)
+from .daum import (
+ DaumIE,
+ DaumClipIE,
+ DaumPlaylistIE,
+ DaumUserIE,
+)
+from .dbtv import DBTVIE
+from .dctp import DctpTvIE
+from .deezer import DeezerPlaylistIE
+from .democracynow import DemocracynowIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digg import DiggIE
+from .dotsub import DotsubIE
+from .douyutv import (
+ DouyuShowIE,
+ DouyuTVIE,
+)
+from .dplay import (
+ DPlayIE,
+ DiscoveryPlusIE,
+ HGTVDeIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+)
+from .dtube import DTubeIE
+from .dvtv import DVTVIE
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .discovery import DiscoveryIE
+from .discoverygo import (
+ DiscoveryGoIE,
+ DiscoveryGoPlaylistIE,
+)
+from .discoverynetworks import DiscoveryNetworksDeIE
+from .discoveryvr import DiscoveryVRIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
+from .dropbox import DropboxIE
+from .dw import (
+ DWIE,
+ DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentube import (
+ EllenTubeIE,
+ EllenTubeVideoIE,
+ EllenTubePlaylistIE,
+)
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
+from .escapist import EscapistIE
+from .espn import (
+ ESPNIE,
+ ESPNArticleIE,
+ FiveThirtyEightIE,
+)
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
+from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+)
+from .faz import FazIE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+)
+from .fczenit import FczenitIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
+from .filmweb import FilmwebIE
+from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
+from .fivetv import FiveTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
+from .fox import FOXIE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
+from .foxgay import FoxgayIE
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsArticleIE,
+)
+from .foxsports import FoxSportsIE
+from .franceculture import FranceCultureIE
+from .franceinter import FranceInterIE
+from .francetv import (
+ FranceTVIE,
+ FranceTVSiteIE,
+ FranceTVEmbedIE,
+ FranceTVInfoIE,
+ FranceTVInfoSportIE,
+ FranceTVJeunesseIE,
+ GenerationWhatIE,
+ CultureboxIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .freshlive import FreshLiveIE
+from .frontendmasters import (
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+ FrontendMastersCourseIE
+)
+from .fujitv import FujiTVFODPlus7IE
+from .funimation import FunimationIE
+from .funk import FunkIE
+from .fusion import FusionIE
+from .gaia import GaiaIE
+from .gameinformer import GameInformerIE
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .gedidigital import GediDigitalIE
+from .generic import GenericIE
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+ GloboIE,
+ GloboArticleIE,
+)
+from .go import GoIE
+from .godtube import GodTubeIE
+from .golem import GolemIE
+from .googledrive import GoogleDriveIE
+from .googlepodcasts import (
+ GooglePodcastsIE,
+ GooglePodcastsFeedIE,
+)
+from .googlesearch import GoogleSearchIE
+from .goshgay import GoshgayIE
+from .gputechconf import GPUTechConfIE
+from .groupon import GrouponIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
+from .hidive import HiDiveIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hitrecord import HitRecordIE
+from .hornbunny import HornBunnyIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPlaylistIE,
+)
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .hrti import (
+ HRTiIE,
+ HRTiPlaylistIE,
+)
+from .huajiao import HuajiaoIE
+from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+)
+from .hypem import HypemIE
+from .ign import (
+ IGNIE,
+ IGNVideoIE,
+ IGNArticleIE,
+)
+from .iheart import (
+ IHeartRadioIE,
+ IHeartRadioPodcastIE,
+)
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
+from .imgur import (
+ ImgurIE,
+ ImgurAlbumIE,
+ ImgurGalleryIE,
+)
+from .ina import InaIE
+from .inc import IncIE
+from .indavideo import IndavideoEmbedIE
+from .infoq import InfoQIE
+from .instagram import (
+ InstagramIE,
+ InstagramUserIE,
+ InstagramTagIE,
+)
+from .internazionale import InternazionaleIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import IPrimaIE
+from .iqiyi import IqiyiIE
+from .ir90tv import Ir90TvIE
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .iwara import IwaraIE
+from .izlesene import IzleseneIE
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .joj import JojIE
+from .jwplatform import JWPlatformIE
+from .kakao import KakaoIE
+from .kaltura import KalturaIE
+from .kankan import KankanIE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .ketnet import KetnetIE
+from .khanacademy import (
+ KhanAcademyIE,
+ KhanAcademyUnitIE,
+)
+from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
+from .kinopoisk import KinoPoiskIE
+from .konserthusetplay import KonserthusetPlayIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
+from .la7 import LA7IE
+from .laola1tv import (
+ Laola1TvEmbedIE,
+ Laola1TvIE,
+ EHFTVIE,
+ ITTFIE,
+)
+from .lbry import (
+ LBRYIE,
+ LBRYChannelIE,
+)
+from .lci import LCIIE
+from .lcp import (
+ LcpPlayIE,
+ LcpIE,
+)
+from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
+from .leeco import (
+ LeIE,
+ LePlaylistIE,
+ LetvCloudIE,
+)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
+from .limelight import (
+ LimelightMediaIE,
+ LimelightChannelIE,
+ LimelightChannelListIE,
+)
+from .line import (
+ LineTVIE,
+ LineLiveIE,
+ LineLiveChannelIE,
+)
+from .linkedin import (
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .linuxacademy import LinuxAcademyIE
+from .litv import LiTVIE
+from .livejournal import LiveJournalIE
+from .liveleak import (
+ LiveLeakIE,
+ LiveLeakEmbedIE,
+)
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
+from .lnkgo import LnkGoIE
+from .localnews8 import LocalNews8IE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import LRTIE
+from .lynda import (
+ LyndaIE,
+ LyndaCourseIE
+)
+from .m6 import M6IE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
+from .malltv import MallTVIE
+from .mangomolo import (
+ MangomoloVideoIE,
+ MangomoloLiveIE,
+)
+from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
+from .massengeschmacktv import MassengeschmackTVIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .medaltv import MedalTVIE
+from .mediaset import MediasetIE
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
+from .medici import MediciIE
+from .megaphone import MegaphoneIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
+from .meta import METAIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .miaopai import MiaoPaiIE
+from .microsoftvirtualacademy import (
+ MicrosoftVirtualAcademyIE,
+ MicrosoftVirtualAcademyCourseIE,
+)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mit import TechTVMITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixcloud import (
+ MixcloudIE,
+ MixcloudUserIE,
+ MixcloudPlaylistIE,
+)
+from .mlb import (
+ MLBIE,
+ MLBVideoIE,
+)
+from .mnet import MnetIE
+from .moevideo import MoeVideoIE
+from .mofosex import (
+ MofosexIE,
+ MofosexEmbedIE,
+)
+from .mojvideo import MojvideoIE
+from .morningstar import MorningstarIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE
+)
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
+from .msn import MSNIE
+from .mtv import (
+ MTVIE,
+ MTVVideoIE,
+ MTVServicesEmbeddedIE,
+ MTVDEIE,
+ MTVJapanIE,
+)
+from .muenchentv import MuenchenTVIE
+from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mychannels import MyChannelsIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import (
+ MyviIE,
+ MyviEmbedIE,
+)
+from .myvidster import MyVidsterIE
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
+from .naver import NaverIE
+from .nba import (
+ NBAWatchEmbedIE,
+ NBAWatchIE,
+ NBAWatchCollectionIE,
+ NBAEmbedIE,
+ NBAIE,
+ NBAChannelIE,
+)
+from .nbc import (
+ NBCIE,
+ NBCNewsIE,
+ NBCOlympicsIE,
+ NBCOlympicsStreamIE,
+ NBCSportsIE,
+ NBCSportsStreamIE,
+ NBCSportsVPlayerIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
+ NDREmbedBaseIE,
+ NDREmbedIE,
+ NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
+from .nerdcubed import NerdCubedFeedIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+)
+from .newstube import NewstubeIE
+from .nextmedia import (
+ NextMediaIE,
+ NextMediaActionNewsIE,
+ AppleDailyIE,
+ NextTVIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nfl import (
+ NFLIE,
+ NFLArticleIE,
+)
+from .nhk import (
+ NhkVodIE,
+ NhkVodProgramIE,
+)
+from .nhl import NHLIE
+from .nick import (
+ NickIE,
+ NickBrIE,
+ NickDeIE,
+ NickNightIE,
+ NickRuIE,
+)
+from .niconico import NiconicoIE, NiconicoPlaylistIE
+from .ninecninemedia import NineCNineMediaIE
+from .ninegag import NineGagIE
+from .ninenow import NineNowIE
+from .nintendo import NintendoIE
+from .njpwworld import NJPWWorldIE
+from .nobelprize import NobelPrizeIE
+from .nonktube import NonkTubeIE
+from .noovo import NoovoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nova import (
+ NovaEmbedIE,
+ NovaIE,
+)
+from .nowness import (
+ NownessIE,
+ NownessPlaylistIE,
+ NownessSeriesIE,
+)
+from .noz import NozIE
+from .npo import (
+ AndereTijdenIE,
+ NPOIE,
+ NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
+ SchoolTVIE,
+ HetKlokhuisIE,
+ VPROIE,
+ WNLIE,
+)
+from .npr import NprIE
+from .nrk import (
+ NRKIE,
+ NRKPlaylistIE,
+ NRKSkoleIE,
+ NRKTVIE,
+ NRKTVDirekteIE,
+ NRKRadioPodkastIE,
+ NRKTVEpisodeIE,
+ NRKTVEpisodesIE,
+ NRKTVSeasonIE,
+ NRKTVSeriesIE,
+)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+)
+from .nuvid import NuvidIE
+from .nzz import NZZIE
+from .odatv import OdaTVIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oktoberfesttv import OktoberfestTVIE
+from .ondemandkorea import OnDemandKoreaIE
+from .onet import (
+ OnetIE,
+ OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
+)
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+ OoyalaIE,
+ OoyalaExternalIE,
+)
+from .ora import OraTVIE
+from .orf import (
+ ORFTVthekIE,
+ ORFFM4IE,
+ ORFFM4StoryIE,
+ ORFOE1IE,
+ ORFOE3IE,
+ ORFNOEIE,
+ ORFWIEIE,
+ ORFBGLIE,
+ ORFOOEIE,
+ ORFSTMIE,
+ ORFKTNIE,
+ ORFSBGIE,
+ ORFTIRIE,
+ ORFVBGIE,
+ ORFIPTVIE,
+)
+from .outsidetv import OutsideTVIE
+from .packtpub import (
+ PacktPubIE,
+ PacktPubCourseIE,
+)
+from .palcomp3 import (
+ PalcoMP3IE,
+ PalcoMP3ArtistIE,
+ PalcoMP3VideoIE,
+)
+from .pandoratv import PandoraTVIE
+from .parliamentliveuk import ParliamentLiveUKIE
+from .patreon import PatreonIE
+from .pbs import PBSIE
+from .pearvideo import PearVideoIE
+from .peertube import PeerTubeIE
+from .people import PeopleIE
+from .performgroup import PerformGroupIE
+from .periscope import (
+ PeriscopeIE,
+ PeriscopeUserIE,
+)
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
+from .piksel import PikselIE
+from .pinkbike import PinkbikeIE
+from .pinterest import (
+ PinterestIE,
+ PinterestCollectionIE,
+)
+from .pladform import PladformIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
+from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
+from .plays import PlaysTVIE
+from .playstuff import PlayStuffIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .pluralsight import (
+ PluralsightIE,
+ PluralsightCourseIE,
+)
+from .podomatic import PodomaticIE
+from .pokemon import PokemonIE
+from .polskieradio import (
+ PolskieRadioIE,
+ PolskieRadioCategoryIE,
+)
+from .popcorntimes import PopcorntimesIE
+from .popcorntv import PopcornTVIE
+from .porn91 import Porn91IE
+from .porncom import PornComIE
+from .pornhd import PornHdIE
+from .pornhub import (
+ PornHubIE,
+ PornHubUserIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
+)
+from .presstv import PressTVIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+ QQMusicPlaylistIE,
+)
+from .r7 import (
+ R7IE,
+ R7ArticleIE,
+)
+from .radiocanada import (
+ RadioCanadaIE,
+ RadioCanadaAudioVideoIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import RadioFranceIE
+from .rai import (
+ RaiPlayIE,
+ RaiPlayLiveIE,
+ RaiPlayPlaylistIE,
+ RaiIE,
+)
+from .raywenderlich import (
+ RayWenderlichIE,
+ RayWenderlichCourseIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rds import RDSIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullEmbedIE,
+ RedBullTVRrnContentIE,
+ RedBullIE,
+)
+from .reddit import (
+ RedditIE,
+ RedditRIE,
+)
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .rice import RICEIE
+from .rmcdecouverte import RMCDecouverteIE
+from .ro220 import Ro220IE
+from .rockstargames import RockstarGamesIE
+from .roosterteeth import RoosterTeethIE
+from .rottentomatoes import RottenTomatoesIE
+from .roxwel import RoxwelIE
+from .rozhlas import RozhlasIE
+from .rtbf import RTBFIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import RtlNlIE
+from .rtl2 import (
+ RTL2IE,
+ RTL2YouIE,
+ RTL2YouSeriesIE,
+)
+from .rtp import RTPIE
+from .rts import RTSIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
+from .rtvnh import RTVNHIE
+from .rtvs import RTVSIE
+from .ruhd import RUHDIE
+from .rumble import RumbleEmbedIE
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeEmbedIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+ RutubePlaylistIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .ruv import RuvIE
+from .safari import (
+ SafariIE,
+ SafariApiIE,
+ SafariCourseIE,
+)
+from .samplefocus import SampleFocusIE
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
+from .seeker import SeekerIE
+from .senateisvp import SenateISVPIE
+from .sendtonews import SendtoNewsIE
+from .servus import ServusIE
+from .sevenplus import SevenPlusIE
+from .sexu import SexuIE
+from .seznamzpravy import (
+ SeznamZpravyIE,
+ SeznamZpravyArticleIE,
+)
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
+from .shared import (
+ SharedIE,
+ VivoIE,
+)
+from .showroomlive import ShowRoomLiveIE
+from .simplecast import (
+ SimplecastIE,
+ SimplecastEpisodeIE,
+ SimplecastPodcastIE,
+)
+from .sina import SinaIE
+from .sixplay import SixPlayIE
+from .skyit import (
+ SkyItPlayerIE,
+ SkyItVideoIE,
+ SkyItVideoLiveIE,
+ SkyItIE,
+ SkyItAcademyIE,
+ SkyItArteIE,
+ CieloTVItIE,
+ TV8ItIE,
+)
+from .skylinewebcams import SkylineWebcamsIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
+from .sky import (
+ SkyNewsIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
+from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
+from .slutload import SlutloadIE
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .sonyliv import SonyLIVIE
+from .soundcloud import (
+ SoundcloudEmbedIE,
+ SoundcloudIE,
+ SoundcloudSetIE,
+ SoundcloudUserIE,
+ SoundcloudTrackStationIE,
+ SoundcloudPlaylistIE,
+ SoundcloudSearchIE,
+)
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
+from .southpark import (
+ SouthParkIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkNlIE
+)
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
+)
+from .sport5 import Sport5IE
+from .sportbox import SportBoxIE
+from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
+from .spreaker import (
+ SpreakerIE,
+ SpreakerPageIE,
+ SpreakerShowIE,
+ SpreakerShowPageIE,
+)
+from .springboardplatform import SpringboardPlatformIE
+from .sprout import SproutIE
+from .srgssr import (
+ SRGSSRIE,
+ SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .steam import SteamIE
+from .storyfire import (
+ StoryFireIE,
+ StoryFireUserIE,
+ StoryFireSeriesIE,
+)
+from .streamable import StreamableIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
+from .stv import STVPlayerIE
+from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
+from .svt import (
+ SVTIE,
+ SVTPageIE,
+ SVTPlayIE,
+ SVTSeriesIE,
+)
+from .swrmediathek import SWRMediathekIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import (
+ TagesschauPlayerIE,
+ TagesschauIE,
+)
+from .tass import TassIE
+from .tbs import TBSIE
+from .tdslifeway import TDSLifewayIE
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
+from .techtalks import TechTalksIE
+from .ted import TEDIE
+from .tele5 import Tele5IE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telemb import TeleMBIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecSquatIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+ TeleQuebecVideoIE,
+)
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .tennistv import TennisTVIE
+from .tenplay import TenPlayIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .tfo import TFOIE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+ ThePlatformIE,
+ ThePlatformFeedIE,
+)
+from .thescene import TheSceneIE
+from .thestar import TheStarIE
+from .thesun import TheSunIE
+from .theweatherchannel import TheWeatherChannelIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
+from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+)
+from .tinypic import TinyPicIE
+from .tmz import (
+ TMZIE,
+ TMZArticleIE,
+)
+from .tnaflix import (
+ TNAFlixNetworkEmbedIE,
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
+from .toggle import (
+ ToggleIE,
+ MeWatchIE,
+)
+from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+)
+from .trunews import TruNewsIE
+from .trutv import TruTVIE
+from .tube8 import Tube8IE
+from .tubitv import TubiTvIE
+from .tumblr import TumblrIE
+from .tunein import (
+ TuneInClipIE,
+ TuneInStationIE,
+ TuneInProgramIE,
+ TuneInTopicIE,
+ TuneInShortenerIE,
+)
+from .tunepk import TunePkIE
+from .turbo import TurboIE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+ KatsomoIE,
+ MTVUutisetArticleIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
+from .tv2hu import TV2HuIE
+from .tv4 import TV4IE
+from .tv5mondeplus import TV5MondePlusIE
+from .tv5unis import (
+ TV5UnisVideoIE,
+ TV5UnisIE,
+)
+from .tva import (
+ TVAIE,
+ QubIE,
+)
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
+from .tver import TVerIE
+from .tvigle import TvigleIE
+from .tvland import TVLandIE
+from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
+from .tvnoe import TVNoeIE
+from .tvnow import (
+ TVNowIE,
+ TVNowNewIE,
+ TVNowSeasonIE,
+ TVNowAnnualIE,
+ TVNowShowIE,
+)
+from .tvp import (
+ TVPEmbedIE,
+ TVPIE,
+ TVPWebsiteIE,
+)
+from .tvplay import (
+ TVPlayIE,
+ ViafreeIE,
+ TVPlayHomeIE,
+)
+from .tvplayer import TVPlayerIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import TwitCastingIE
+from .twitch import (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchStreamIE,
+ TwitchClipsIE,
+)
+from .twitter import (
+ TwitterCardIE,
+ TwitterIE,
+ TwitterAmplifyIE,
+ TwitterBroadcastIE,
+)
+from .udemy import (
+ UdemyIE,
+ UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
+from .uktvplay import UKTVPlayIE
+from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
+from .umg import UMGDeIE
+from .unistra import UnistraIE
+from .unity import UnityIE
+from .uol import UOLIE
+from .uplynk import (
+ UplynkIE,
+ UplynkPreplayIE,
+)
+from .urort import UrortIE
+from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+ UstudioIE,
+ UstudioEmbedIE,
+)
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veoh import VeohIE
+from .vesti import VestiIE
+from .vevo import (
+ VevoIE,
+ VevoPlaylistIE,
+)
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+ ViceIE,
+ ViceArticleIE,
+ ViceShowIE,
+)
+from .vidbit import VidbitIE
+from .viddler import ViddlerIE
+from .videa import VideaIE
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomore import (
+ VideomoreIE,
+ VideomoreVideoIE,
+ VideomoreSeasonIE,
+)
+from .videopress import VideoPressIE
+from .vidio import VidioIE
+from .vidlii import VidLiiIE
+from .vidme import (
+ VidmeIE,
+ VidmeUserIE,
+ VidmeUserLikesIE,
+)
+from .vier import VierIE, VierVideosIE
+from .viewlift import (
+ ViewLiftIE,
+ ViewLiftEmbedIE,
+)
+from .viidea import ViideaIE
+from .vimeo import (
+ VimeoIE,
+ VimeoAlbumIE,
+ VimeoChannelIE,
+ VimeoGroupsIE,
+ VimeoLikesIE,
+ VimeoOndemandIE,
+ VimeoReviewIE,
+ VimeoUserIE,
+ VimeoWatchLaterIE,
+ VHXEmbedIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
+from .viqeo import ViqeoIE
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+)
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+ VKWallPostIE,
+)
+from .vlive import (
+ VLiveIE,
+ VLivePostIE,
+ VLiveChannelIE,
+)
+from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
+from .vodplatform import VODPlatformIE
+from .voicerepublic import VoiceRepublicIE
+from .voot import VootIE
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
+from .vrt import VRTIE
+from .vrak import VrakIE
+from .vrv import (
+ VRVIE,
+ VRVSeriesIE,
+)
+from .vshare import VShareIE
+from .vtm import VTMIE
+from .medialaan import MedialaanIE
+from .vube import VubeIE
+from .vuclip import VuClipIE
+from .vvvvid import (
+ VVVVIDIE,
+ VVVVIDShowIE,
+)
+from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
+from .wakanim import WakanimIE
+from .walla import WallaIE
+from .washingtonpost import (
+ WashingtonPostIE,
+ WashingtonPostArticleIE,
+)
+from .wat import WatIE
+from .watchbox import WatchBoxIE
+from .watchindianporn import WatchIndianPornIE
+from .wdr import (
+ WDRIE,
+ WDRPageIE,
+ WDRElefantIE,
+ WDRMobileIE,
+)
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
+from .weibo import (
+ WeiboIE,
+ WeiboMobileIE
+)
+from .weiqitv import WeiqiTVIE
+from .wistia import (
+ WistiaIE,
+ WistiaPlaylistIE,
+)
+from .worldstarhiphop import WorldStarHipHopIE
+from .wsj import (
+ WSJIE,
+ WSJArticleIE,
+)
+from .wwe import WWEIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+ XHamsterUserIE,
+)
+from .xiami import (
+ XiamiSongIE,
+ XiamiAlbumIE,
+ XiamiArtistIE,
+ XiamiCollectionIE
+)
+from .ximalaya import (
+ XimalayaIE,
+ XimalayaAlbumIE
+)
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+ YahooIE,
+ YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
+ YahooJapanNewsIE,
+)
+from .yandexdisk import YandexDiskIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicArtistAlbumsIE,
+)
+from .yandexvideo import YandexVideoIE
+from .yapfiles import YapFilesIE
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import (
+ YoukuIE,
+ YoukuShowIE,
+)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
+from .youporn import YouPornIE
+from .yourporn import YourPornIE
+from .yourupload import YourUploadIE
+from .youtube import (
+ YoutubeIE,
+ YoutubeFavouritesIE,
+ YoutubeHistoryIE,
+ YoutubeTabIE,
+ YoutubePlaylistIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ #YoutubeSearchURLIE,
+ YoutubeSubscriptionsIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeYtBeIE,
+ YoutubeYtUserIE,
+ YoutubeWatchLaterIE,
+)
+from .zapiks import ZapiksIE
+from .zattoo import (
+ BBVTVIE,
+ EinsUndEinsTVIE,
+ EWETVIE,
+ GlattvisionTVIE,
+ MNetTVIE,
+ MyVisionTVIE,
+ NetPlusIE,
+ OsnatelTVIE,
+ QuantumTVIE,
+ QuicklineIE,
+ QuicklineLiveIE,
+ SaltTVIE,
+ SAKTVIE,
+ VTXTVIE,
+ WalyTVIE,
+ ZattooIE,
+ ZattooLiveIE,
+)
+from .zdf import ZDFIE, ZDFChannelIE
+from .zhihu import ZhihuIE
+from .zingmp3 import (
+ ZingMp3IE,
+ ZingMp3AlbumIE,
+)
+from .zoom import ZoomIE
+from .zype import ZypeIE
diff --git a/hypervideo_dl/extractor/extremetube.py b/hypervideo_dl/extractor/extremetube.py
new file mode 100644
index 0000000..acd4090
--- /dev/null
+++ b/hypervideo_dl/extractor/extremetube.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+from ..utils import str_to_int
+from .keezmovies import KeezMoviesIE
+
+
+class ExtremeTubeIE(KeezMoviesIE):
+ _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
+ _TESTS = [{
+ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
+ 'md5': '92feaafa4b58e82f261e5419f39c60cb',
+ 'info_dict': {
+ 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
+ 'ext': 'mp4',
+ 'title': 'Music Video 14 british euro brit european cumshots swallow',
+ 'uploader': 'anonim',
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://www.extremetube.com/gay/video/abcde-1234',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.extremetube.com/video/652431',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ webpage, info = self._extract_info(url)
+
+ if not info['title']:
+ info['title'] = self._search_regex(
+ r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
+
+ uploader = self._html_search_regex(
+ r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+ view_count = str_to_int(self._search_regex(
+ r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
+ webpage, 'view count', fatal=False))
+
+ info.update({
+ 'uploader': uploader,
+ 'view_count': view_count,
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/eyedotv.py b/hypervideo_dl/extractor/eyedotv.py
new file mode 100644
index 0000000..f62ddeb
--- /dev/null
+++ b/hypervideo_dl/extractor/eyedotv.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+ ExtractorError,
+)
+
+
+class EyedoTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
+ 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
+ 'info_dict': {
+ 'id': '16301',
+ 'ext': 'mp4',
+ 'title': 'Journée du conseil scientifique de l\'Afnic 2015',
+ 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
+ 'uploader': 'Afnic Live',
+ 'uploader_id': '8023',
+ }
+ }
+ _ROOT_URL = 'http://live.eyedo.net:1935/'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
+
+ def _add_ns(path):
+ return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
+
+ title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
+ state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
+ if state_live_code == 'avenir':
+ raise ExtractorError(
+ '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
+ expected=True)
+
+ is_live = state_live_code == 'live'
+ m3u8_url = None
+ # http://eyedo.tv/Content/Html5/Scripts/html5view.js
+ if is_live:
+ if xpath_text(video_data, 'Cdn') == 'true':
+ m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
+ else:
+ m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
+ else:
+ m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native'),
+ 'description': xpath_text(video_data, _add_ns('Description')),
+ 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
+ 'uploader': xpath_text(video_data, _add_ns('Createur')),
+ 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
+ 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
+ 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
+ }
diff --git a/hypervideo_dl/extractor/facebook.py b/hypervideo_dl/extractor/facebook.py
new file mode 100644
index 0000000..04650af
--- /dev/null
+++ b/hypervideo_dl/extractor/facebook.py
@@ -0,0 +1,709 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+import socket
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_http_client,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+)
+from ..utils import (
+ clean_html,
+ error_to_compat_str,
+ ExtractorError,
+ float_or_none,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ limit_length,
+ parse_count,
+ qualities,
+ sanitized_Request,
+ try_get,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class FacebookIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
+ (?:[^#]*?\#!/)?
+ (?:
+ (?:
+ video/video\.php|
+ photo\.php|
+ video\.php|
+ video/embed|
+ story\.php|
+ watch(?:/live)?/?
+ )\?(?:.*?)(?:v|video_id|story_fbid)=|
+ [^/]+/videos/(?:[^/]+/)?|
+ [^/]+/posts/|
+ groups/[^/]+/permalink/|
+ watchparty/
+ )|
+ facebook:
+ )
+ (?P<id>[0-9]+)
+ '''
+ _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
+ _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
+ _NETRC_MACHINE = 'facebook'
+ IE_NAME = 'facebook'
+
+ _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+ _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
+ 'md5': '6a40d33c0eccbb1af76cf0485a052659',
+ 'info_dict': {
+ 'id': '637842556329505',
+ 'ext': 'mp4',
+ 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
+ 'uploader': 'Tennis on Facebook',
+ 'upload_date': '20140908',
+ 'timestamp': 1410199200,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # data.video
+ 'url': 'https://www.facebook.com/video.php?v=274175099429670',
+ 'info_dict': {
+ 'id': '274175099429670',
+ 'ext': 'mp4',
+ 'title': 're:^Asif Nawab Butt posted a video',
+ 'uploader': 'Asif Nawab Butt',
+ 'upload_date': '20140506',
+ 'timestamp': 1399398998,
+ 'thumbnail': r're:^https?://.*',
+ },
+ 'expected_warnings': [
+ 'title'
+ ]
+ }, {
+ 'note': 'Video with DASH manifest',
+ 'url': 'https://www.facebook.com/video.php?v=957955867617029',
+ 'md5': 'b2c28d528273b323abe5c6ab59f0f030',
+ 'info_dict': {
+ 'id': '957955867617029',
+ 'ext': 'mp4',
+ 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
+ 'uploader': 'Demy de Zeeuw',
+ 'upload_date': '20160110',
+ 'timestamp': 1452431627,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+ 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+ 'info_dict': {
+ 'id': '544765982287235',
+ 'ext': 'mp4',
+ 'title': '"What are you doing running in the snow?"',
+ 'uploader': 'FailArmy',
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+ 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+ 'info_dict': {
+ 'id': '1035862816472149',
+ 'ext': 'mp4',
+ 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
+ 'uploader': 'S. Saint',
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'note': 'swf params escaped',
+ 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+ 'md5': '97ba073838964d12c70566e0085c2b91',
+ 'info_dict': {
+ 'id': '10153664894881749',
+ 'ext': 'mp4',
+ 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
+ 'thumbnail': r're:^https?://.*',
+ 'timestamp': 1456259628,
+ 'upload_date': '20160223',
+ 'uploader': 'Barack Obama',
+ },
+ }, {
+ # have 1080P, but only up to 720p in swf params
+ # data.video.story.attachments[].media
+ 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
+ 'md5': '9571fae53d4165bbbadb17a94651dcdc',
+ 'info_dict': {
+ 'id': '10155529876156509',
+ 'ext': 'mp4',
+ 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
+ 'timestamp': 1477818095,
+ 'upload_date': '20161030',
+ 'uploader': 'CNN',
+ 'thumbnail': r're:^https?://.*',
+ 'view_count': int,
+ },
+ }, {
+ # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
+ 'info_dict': {
+ 'id': '1417995061575415',
+ 'ext': 'mp4',
+ 'title': 'md5:1db063d6a8c13faa8da727817339c857',
+ 'timestamp': 1486648217,
+ 'upload_date': '20170209',
+ 'uploader': 'Yaroslav Korpan',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
+ 'info_dict': {
+ 'id': '1072691702860471',
+ 'ext': 'mp4',
+ 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
+ 'timestamp': 1477305000,
+ 'upload_date': '20161024',
+ 'uploader': 'La Guía Del Varón',
+ 'thumbnail': r're:^https?://.*',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
+ 'info_dict': {
+ 'id': '1396382447100162',
+ 'ext': 'mp4',
+ 'title': 'md5:19a428bbde91364e3de815383b54a235',
+ 'timestamp': 1486035494,
+ 'upload_date': '20170202',
+ 'uploader': 'Elisabeth Ahtn',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
+ 'only_matching': True,
+ }, {
+ # data.mediaset.currMedia.edges
+ 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
+ 'only_matching': True,
+ }, {
+ # data.video.story.attachments[].media
+ 'url': 'facebook:544765982287235',
+ 'only_matching': True,
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
+ 'only_matching': True,
+ }, {
+ # data.video.creation_story.attachments[].media
+ 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
+ 'only_matching': True,
+ }, {
+ # data.video
+ 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
+ 'only_matching': True,
+ }, {
+ # no title
+ 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
+ 'only_matching': True,
+ }, {
+ # data.video
+ 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
+ 'info_dict': {
+ 'id': '359649331226507',
+ 'ext': 'mp4',
+ 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+ 'uploader': 'ESL One Dota 2',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
+ 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
+ 'info_dict': {
+ 'id': '106560053808006',
+ },
+ 'playlist_count': 2,
+ }, {
+ # data.video.story.attachments[].media
+ 'url': 'https://www.facebook.com/watch/?v=647537299265662',
+ 'only_matching': True,
+ }, {
+ # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
+ 'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
+ 'info_dict': {
+ 'id': '10157667649866271',
+ },
+ 'playlist_count': 3,
+ }, {
+ # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
+ 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
+ 'info_dict': {
+ 'id': '117576630041613',
+ 'ext': 'mp4',
+ # TODO: title can be extracted from video page
+ 'title': 'Facebook video #117576630041613',
+ 'uploader_id': '189393014416438',
+ 'upload_date': '20201123',
+ 'timestamp': 1606162592,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
+ 'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
+ 'info_dict': {
+ 'id': '211567722618337',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #211567722618337',
+ 'uploader_id': '127875227654254',
+ 'upload_date': '20161122',
+ 'timestamp': 1479793574,
+ },
+ }, {
+ # data.video.creation_story.attachments[].media
+ 'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/watchparty/211641140192478',
+ 'info_dict': {
+ 'id': '211641140192478',
+ },
+ 'playlist_count': 1,
+ 'skip': 'Requires logging in',
+ }]
+ _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
+ _api_config = {
+ 'graphURI': '/api/graphql/'
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
+ webpage):
+ urls.append(mobj.group('url'))
+ # Facebook API embed
+ # see https://developers.facebook.com/docs/plugins/embedded-video-player
+ for mobj in re.finditer(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
+ urls.append(mobj.group('url'))
+ return urls
+
+ def _login(self):
+ useremail, password = self._get_login_info()
+ if useremail is None:
+ return
+
+ login_page_req = sanitized_Request(self._LOGIN_URL)
+ self._set_cookie('facebook.com', 'locale', 'en_US')
+ login_page = self._download_webpage(login_page_req, None,
+ note='Downloading login page',
+ errnote='Unable to download login page')
+ lsd = self._search_regex(
+ r'<input type="hidden" name="lsd" value="([^"]*)"',
+ login_page, 'lsd')
+ lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
+
+ login_form = {
+ 'email': useremail,
+ 'pass': password,
+ 'lsd': lsd,
+ 'lgnrnd': lgnrnd,
+ 'next': 'http://facebook.com/home.php',
+ 'default_persistent': '0',
+ 'legacy_return': '1',
+ 'timezone': '-60',
+ 'trynum': '1',
+ }
+ request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ try:
+ login_results = self._download_webpage(request, None,
+ note='Logging in', errnote='unable to fetch login page')
+ if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
+ login_results, 'login error', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
+ return
+
+ fb_dtsg = self._search_regex(
+ r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
+ h = self._search_regex(
+ r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
+
+ if not fb_dtsg or not h:
+ return
+
+ check_form = {
+ 'fb_dtsg': fb_dtsg,
+ 'h': h,
+ 'name_action_selected': 'dont_save',
+ }
+ check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
+ check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ check_response = self._download_webpage(check_req, None,
+ note='Confirming login')
+ if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
+ self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
+ return
+
+ def _real_initialize(self):
+ self._login()
+
+ def _extract_from_url(self, url, video_id):
+ webpage = self._download_webpage(
+ url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
+
+ video_data = None
+
+ def extract_video_data(instances):
+ video_data = []
+ for item in instances:
+ if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
+ video_item = item[2][0]
+ if video_item.get('video_id'):
+ video_data.append(video_item['videoData'])
+ return video_data
+
+ server_js_data = self._parse_json(self._search_regex(
+ [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
+ webpage, 'server js data', default='{}'), video_id, fatal=False)
+
+ if server_js_data:
+ video_data = extract_video_data(server_js_data.get('instances', []))
+
+ def extract_from_jsmods_instances(js_data):
+ if js_data:
+ return extract_video_data(try_get(
+ js_data, lambda x: x['jsmods']['instances'], list) or [])
+
+ def extract_dash_manifest(video, formats):
+ dash_manifest = video.get('dash_manifest')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+
+ def process_formats(formats):
+ # Downloads with browser's User-Agent are rate limited. Working around
+ # with non-browser User-Agent.
+ for f in formats:
+ f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
+
+ self._sort_formats(formats)
+
+ def extract_relay_data(_filter):
+ return self._parse_json(self._search_regex(
+ r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
+ webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
+
+ def extract_relay_prefetched_data(_filter):
+ replay_data = extract_relay_data(_filter)
+ for require in (replay_data.get('require') or []):
+ if require[0] == 'RelayPrefetchedStreamCache':
+ return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
+
+ if not video_data:
+ server_js_data = self._parse_json(self._search_regex([
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
+ r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
+ ], webpage, 'js data', default='{}'), video_id, js_to_json, False)
+ video_data = extract_from_jsmods_instances(server_js_data)
+
+ if not video_data:
+ data = extract_relay_prefetched_data(
+ r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
+ if data:
+ entries = []
+
+ def parse_graphql_video(video):
+ formats = []
+ q = qualities(['sd', 'hd'])
+ for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
+ playable_url = video.get('playable_url' + suffix)
+ if not playable_url:
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'quality': q(format_id),
+ 'url': playable_url,
+ })
+ extract_dash_manifest(video, formats)
+ process_formats(formats)
+ v_id = video.get('videoId') or video.get('id') or video_id
+ info = {
+ 'id': v_id,
+ 'formats': formats,
+ 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
+ 'uploader_id': try_get(video, lambda x: x['owner']['id']),
+ 'timestamp': int_or_none(video.get('publish_time')),
+ 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
+ }
+ description = try_get(video, lambda x: x['savable_description']['text'])
+ title = video.get('name')
+ if title:
+ info.update({
+ 'title': title,
+ 'description': description,
+ })
+ else:
+ info['title'] = description or 'Facebook video #%s' % v_id
+ entries.append(info)
+
+ def parse_attachment(attachment, key='media'):
+ media = attachment.get(key) or {}
+ if media.get('__typename') == 'Video':
+ return parse_graphql_video(media)
+
+ nodes = data.get('nodes') or []
+ node = data.get('node') or {}
+ if not nodes and node:
+ nodes.append(node)
+ for node in nodes:
+ story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
+ attachments = try_get(story, [
+ lambda x: x['attached_story']['attachments'],
+ lambda x: x['attachments']
+ ], list) or []
+ for attachment in attachments:
+ attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
+ ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
+ for n in ns:
+ parse_attachment(n)
+ parse_attachment(attachment)
+
+ edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
+ for edge in edges:
+ parse_attachment(edge, key='node')
+
+ video = data.get('video') or {}
+ if video:
+ attachments = try_get(video, [
+ lambda x: x['story']['attachments'],
+ lambda x: x['creation_story']['attachments']
+ ], list) or []
+ for attachment in attachments:
+ parse_attachment(attachment)
+ if not entries:
+ parse_graphql_video(video)
+
+ return self.playlist_result(entries, video_id)
+
+ if not video_data:
+ m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
+ if m_msg is not None:
+ raise ExtractorError(
+ 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+ expected=True)
+ elif any(p in webpage for p in (
+ '>You must log in to continue',
+ 'id="login_form"',
+ 'id="loginbutton"')):
+ self.raise_login_required()
+
+ if not video_data and '/watchparty/' in url:
+ post_data = {
+ 'doc_id': 3731964053542869,
+ 'variables': json.dumps({
+ 'livingRoomID': video_id,
+ }),
+ }
+
+ prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
+ if prefetched_data:
+ lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
+ if lsd:
+ post_data[lsd['name']] = lsd['value']
+
+ relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
+ for define in (relay_data.get('define') or []):
+ if define[0] == 'RelayAPIConfigDefaults':
+ self._api_config = define[2]
+
+ living_room = self._download_json(
+ urljoin(url, self._api_config['graphURI']), video_id,
+ data=urlencode_postdata(post_data))['data']['living_room']
+
+ entries = []
+ for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
+ video = try_get(edge, lambda x: x['node']['video']) or {}
+ v_id = video.get('id')
+ if not v_id:
+ continue
+ v_id = compat_str(v_id)
+ entries.append(self.url_result(
+ self._VIDEO_PAGE_TEMPLATE % v_id,
+ self.ie_key(), v_id, video.get('name')))
+
+ return self.playlist_result(entries, video_id)
+
+ if not video_data:
+ # Video info not in first request, do a secondary request using
+ # tahoe player specific URL
+ tahoe_data = self._download_webpage(
+ self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
+ data=urlencode_postdata({
+ '__a': 1,
+ '__pc': self._search_regex(
+ r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'pkg cohort', default='PHASED:DEFAULT'),
+ '__rev': self._search_regex(
+ r'client_revision["\']\s*:\s*(\d+),', webpage,
+ 'client revision', default='3944515'),
+ 'fb_dtsg': self._search_regex(
+ r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
+ webpage, 'dtsg token', default=''),
+ }),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ tahoe_js_data = self._parse_json(
+ self._search_regex(
+ r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
+ 'tahoe js data', default='{}'),
+ video_id, fatal=False)
+ video_data = extract_from_jsmods_instances(tahoe_js_data)
+
+ if not video_data:
+ raise ExtractorError('Cannot parse data')
+
+ if len(video_data) > 1:
+ entries = []
+ for v in video_data:
+ video_url = v[0].get('video_url')
+ if not video_url:
+ continue
+ entries.append(self.url_result(urljoin(
+ url, video_url), self.ie_key(), v[0].get('video_id')))
+ return self.playlist_result(entries, video_id)
+ video_data = video_data[0]
+
+ formats = []
+ subtitles = {}
+ for f in video_data:
+ format_id = f['stream_type']
+ if f and isinstance(f, dict):
+ f = [f]
+ if not f or not isinstance(f, list):
+ continue
+ for quality in ('sd', 'hd'):
+ for src_type in ('src', 'src_no_ratelimit'):
+ src = f[0].get('%s_%s' % (quality, src_type))
+ if src:
+ preference = -10 if format_id == 'progressive' else 0
+ if quality == 'hd':
+ preference += 5
+ formats.append({
+ 'format_id': '%s_%s_%s' % (format_id, quality, src_type),
+ 'url': src,
+ 'preference': preference,
+ })
+ extract_dash_manifest(f[0], formats)
+ subtitles_src = f[0].get('subtitles_src')
+ if subtitles_src:
+ subtitles.setdefault('en', []).append({'url': subtitles_src})
+ if not formats:
+ raise ExtractorError('Cannot find video formats')
+
+ process_formats(formats)
+
+ video_title = self._html_search_regex(
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
+ 'title', default=None)
+ if not video_title:
+ video_title = self._html_search_regex(
+ r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
+ webpage, 'alternative title', default=None)
+ if not video_title:
+ video_title = self._html_search_meta(
+ 'description', webpage, 'title', default=None)
+ if video_title:
+ video_title = limit_length(video_title, 80)
+ else:
+ video_title = 'Facebook video #%s' % video_id
+ uploader = clean_html(get_element_by_id(
+ 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
+ r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
+ default=None) or self._og_search_title(webpage, fatal=False)
+ timestamp = int_or_none(self._search_regex(
+ r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
+ 'timestamp', default=None))
+ thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
+
+ view_count = parse_count(self._search_regex(
+ r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
+ default=None))
+
+ info_dict = {
+ 'id': video_id,
+ 'title': video_title,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'subtitles': subtitles,
+ }
+
+ return info_dict
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+ return self._extract_from_url(real_url, video_id)
+
+
+class FacebookPluginsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
+ 'md5': '5954e92cdfe51fe5782ae9bda7058a07',
+ 'info_dict': {
+ 'id': '10154383743583686',
+ 'ext': 'mp4',
+ 'title': 'What to do during the haze?',
+ 'uploader': 'Gov.sg',
+ 'upload_date': '20160826',
+ 'timestamp': 1472184808,
+ },
+ 'add_ie': [FacebookIE.ie_key()],
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ compat_urllib_parse_unquote(self._match_id(url)),
+ FacebookIE.ie_key())
diff --git a/hypervideo_dl/extractor/faz.py b/hypervideo_dl/extractor/faz.py
new file mode 100644
index 0000000..312ee2a
--- /dev/null
+++ b/hypervideo_dl/extractor/faz.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_etree_fromstring
+from ..utils import (
+ xpath_element,
+ xpath_text,
+ int_or_none,
+)
+
+
+class FazIE(InfoExtractor):
+ IE_NAME = 'faz.net'
+ _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
+ 'info_dict': {
+ 'id': '12610585',
+ 'ext': 'mp4',
+ 'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
+ 'description': 'md5:1453fbf9a0d041d985a47306192ea253',
+ },
+ }, {
+ 'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.faz.net/foobarblafasel-13659345.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
+ media = self._html_search_regex(
+ r"data-videojs-media='([^']+)",
+ webpage, 'media')
+ if media == 'extern':
+ perform_url = self._search_regex(
+ r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})",
+ webpage, 'perform url')
+ return self.url_result(perform_url)
+ config = compat_etree_fromstring(media)
+
+ encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
+ formats = []
+ for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
+ encoding = xpath_element(encodings, code)
+ if encoding is not None:
+ encoding_url = xpath_text(encoding, 'FILENAME')
+ if encoding_url:
+ tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000)
+ if tbr:
+ tbr = int_or_none(tbr.replace(',', '.'))
+ f = {
+ 'url': encoding_url,
+ 'format_id': code.lower(),
+ 'quality': pref,
+ 'tbr': tbr,
+ 'vcodec': xpath_text(encoding, 'CODEC'),
+ }
+ mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url)
+ if mobj:
+ f.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': tbr or int(mobj.group(3)),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'description': description.strip() if description else None,
+ 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
+ 'duration': int_or_none(xpath_text(config, 'DURATION')),
+ }
diff --git a/hypervideo_dl/extractor/fc2.py b/hypervideo_dl/extractor/fc2.py
new file mode 100644
index 0000000..4355611
--- /dev/null
+++ b/hypervideo_dl/extractor/fc2.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ sanitized_Request,
+ urlencode_postdata,
+)
+
+
+class FC2IE(InfoExtractor):
+ _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
+ IE_NAME = 'fc2'
+ _NETRC_MACHINE = 'fc2'
+ _TESTS = [{
+ 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
+ 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
+ 'info_dict': {
+ 'id': '20121103kUan1KHs',
+ 'ext': 'flv',
+ 'title': 'Boxing again with Puff',
+ },
+ }, {
+ 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
+ 'info_dict': {
+ 'id': '20150125cEva0hDn',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'username': 'ytdl@yt-dl.org',
+ 'password': '(snip)',
+ },
+ 'skip': 'requires actual password',
+ }, {
+ 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
+ 'only_matching': True,
+ }]
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ return False
+
+ # Log in
+ login_form_strs = {
+ 'email': username,
+ 'password': password,
+ 'done': 'video',
+ 'Submit': ' Login ',
+ }
+
+ login_data = urlencode_postdata(login_form_strs)
+ request = sanitized_Request(
+ 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
+
+ login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
+ if 'mode=redirect&login=done' not in login_results:
+ self.report_warning('unable to log in: bad username or password')
+ return False
+
+ # this is also needed
+ login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
+ self._download_webpage(
+ login_redir, None, note='Login redirect', errnote='Login redirect failed')
+
+ return True
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._login()
+ webpage = None
+ if not url.startswith('fc2:'):
+ webpage = self._download_webpage(url, video_id)
+ self._downloader.cookiejar.clear_session_cookies() # must clear
+ self._login()
+
+ title = 'FC2 video %s' % video_id
+ thumbnail = None
+ if webpage is not None:
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
+
+ mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
+
+ info_url = (
+ 'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'.
+ format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
+
+ info_webpage = self._download_webpage(
+ info_url, video_id, note='Downloading info page')
+ info = compat_urlparse.parse_qs(info_webpage)
+
+ if 'err_code' in info:
+ # most of the time we can still download wideo even if err_code is 403 or 602
+ self.report_warning(
+ 'Error code was: %s... but still trying' % info['err_code'][0])
+
+ if 'filepath' not in info:
+ raise ExtractorError('Cannot download file. Are you logged in?')
+
+ video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
+ title_info = info.get('title')
+ if title_info:
+ title = title_info[0]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'thumbnail': thumbnail,
+ }
+
+
+class FC2EmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
+ IE_NAME = 'fc2:embed'
+
+ _TEST = {
+ 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
+ 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
+ 'info_dict': {
+ 'id': '201403223kCqB3Ez',
+ 'ext': 'flv',
+ 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_parse_qs(mobj.group('query'))
+
+ video_id = query['i'][-1]
+ title = query.get('tl', ['FC2 video %s' % video_id])[0]
+
+ sj = query.get('sj', [None])[0]
+ thumbnail = None
+ if sj:
+ # See thumbnailImagePath() in ServerConst.as of flv2.swf
+ thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
+ sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': FC2IE.ie_key(),
+ 'url': 'fc2:%s' % video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/fczenit.py b/hypervideo_dl/extractor/fczenit.py
new file mode 100644
index 0000000..8db7c59
--- /dev/null
+++ b/hypervideo_dl/extractor/fczenit.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
+
+
+class FczenitIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://fc-zenit.ru/video/41044/',
+ 'md5': '0e3fab421b455e970fa1aa3891e57df0',
+ 'info_dict': {
+ 'id': '41044',
+ 'ext': 'mp4',
+ 'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
+ 'timestamp': 1462283735,
+ 'upload_date': '20160503',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ msi_id = self._search_regex(
+ r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id')
+
+ msi_data = self._download_json(
+ 'http://player.fc-zenit.ru/msi/video', msi_id, query={
+ 'video': msi_id,
+ })['data']
+ title = msi_data['name']
+
+ formats = [{
+ 'format_id': q.get('label'),
+ 'url': q['url'],
+ 'height': int_or_none(q.get('label')),
+ } for q in msi_data['qualities'] if q.get('url')]
+
+ self._sort_formats(formats)
+
+ tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': msi_data.get('preview'),
+ 'formats': formats,
+ 'duration': float_or_none(msi_data.get('duration')),
+ 'timestamp': int_or_none(msi_data.get('date')),
+ 'tags': tags,
+ }
diff --git a/hypervideo_dl/extractor/filmon.py b/hypervideo_dl/extractor/filmon.py
new file mode 100644
index 0000000..f775fe0
--- /dev/null
+++ b/hypervideo_dl/extractor/filmon.py
@@ -0,0 +1,178 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ qualities,
+ strip_or_none,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class FilmOnIE(InfoExtractor):
+ IE_NAME = 'filmon'
+ _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
+ 'info_dict': {
+ 'id': '24869',
+ 'ext': 'mp4',
+ 'title': 'Plan 9 From Outer Space',
+ 'description': 'Dead human, zombies and vampires',
+ },
+ }, {
+ 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
+ 'info_dict': {
+ 'id': '2825',
+ 'title': 'Popeye Series 1',
+ 'description': 'The original series of Popeye.',
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(
+ 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
+ video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
+
+ title = response['title']
+ description = strip_or_none(response.get('description'))
+
+ if response.get('type_id') == 1:
+ entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
+ return self.playlist_result(entries, video_id, title, description)
+
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for format_id, stream in response.get('streams', {}).items():
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'quality': QUALITY(stream.get('quality')),
+ 'protocol': 'm3u8_native',
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ poster = response.get('poster', {})
+ thumbs = poster.get('thumbs', {})
+ thumbs['poster'] = poster
+ for thumb_id, thumb in thumbs.items():
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb_id,
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ }
+
+
+class FilmOnChannelIE(InfoExtractor):
+ IE_NAME = 'filmon:channel'
+ _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ # VOD
+ 'url': 'http://www.filmon.com/tv/sports-haters',
+ 'info_dict': {
+ 'id': '4190',
+ 'ext': 'mp4',
+ 'title': 'Sports Haters',
+ 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
+ },
+ }, {
+ # LIVE
+ 'url': 'https://www.filmon.com/channel/filmon-sports',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.filmon.com/tv/2894',
+ 'only_matching': True,
+ }]
+
+ _THUMBNAIL_RES = [
+ ('logo', 56, 28),
+ ('big_logo', 106, 106),
+ ('extra_big_logo', 300, 300),
+ ]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ try:
+ channel_data = self._download_json(
+ 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+ raise
+
+ channel_id = compat_str(channel_data['id'])
+ is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
+ title = channel_data['title']
+
+ QUALITY = qualities(('low', 'high'))
+ formats = []
+ for stream in channel_data.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ if not is_live:
+ formats.extend(self._extract_wowza_formats(
+ stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
+ continue
+ quality = stream.get('quality')
+ formats.append({
+ 'format_id': quality,
+ # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
+ # because it doesn't have bitrate variants anyway
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'quality': QUALITY(quality),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for name, width, height in self._THUMBNAIL_RES:
+ thumbnails.append({
+ 'id': name,
+ 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
+ 'width': width,
+ 'height': height,
+ })
+
+ return {
+ 'id': channel_id,
+ 'display_id': channel_data.get('alias'),
+ 'title': self._live_title(title) if is_live else title,
+ 'description': channel_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/filmweb.py b/hypervideo_dl/extractor/filmweb.py
new file mode 100644
index 0000000..56000bc
--- /dev/null
+++ b/hypervideo_dl/extractor/filmweb.py
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FilmwebIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece'
+ _TEST = {
+ 'url': 'http://www.filmweb.no/trailere/article1264921.ece',
+ 'md5': 'e353f47df98e557d67edaceda9dece89',
+ 'info_dict': {
+ 'id': '13033574',
+ 'ext': 'mp4',
+ 'title': 'Det som en gang var',
+ 'upload_date': '20160316',
+ 'timestamp': 1458140101,
+ 'uploader_id': '12639966',
+ 'uploader': 'Live Roaldset',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_type, article_id = re.match(self._VALID_URL, url).groups()
+ if article_type == 'filmnytt':
+ webpage = self._download_webpage(url, article_id)
+ article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
+ embed_code = self._download_json(
+ 'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp',
+ article_id, query={
+ 'articleId': article_id,
+ })['embedCode']
+ iframe_url = self._proto_relative_url(self._search_regex(
+ r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url'))
+
+ return {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'url': iframe_url,
+ 'ie_key': 'TwentyThreeVideo',
+ }
diff --git a/hypervideo_dl/extractor/firsttv.py b/hypervideo_dl/extractor/firsttv.py
new file mode 100644
index 0000000..28617d8
--- /dev/null
+++ b/hypervideo_dl/extractor/firsttv.py
@@ -0,0 +1,156 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ qualities,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class FirstTVIE(InfoExtractor):
+ IE_NAME = '1tv'
+ IE_DESC = 'Первый канал'
+ _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
+
+ _TESTS = [{
+ # single format
+ 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
+ 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
+ 'info_dict': {
+ 'id': '40049',
+ 'ext': 'mp4',
+ 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ 'upload_date': '20150212',
+ 'duration': 2694,
+ },
+ }, {
+ # multiple formats
+ 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
+ 'info_dict': {
+ 'id': '364746',
+ 'ext': 'mp4',
+ 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ 'upload_date': '20160407',
+ 'duration': 179,
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
+ 'info_dict': {
+ 'id': '14:00',
+ 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
+ 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
+ },
+ 'playlist_count': 13,
+ }, {
+ 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+ playlist_url = compat_urlparse.urljoin(url, self._search_regex(
+ r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'playlist url', group='url'))
+
+ parsed_url = compat_urlparse.urlparse(playlist_url)
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
+
+ items = self._download_json(playlist_url, display_id)
+
+ if item_ids:
+ items = [
+ item for item in items
+ if item.get('uid') and compat_str(item['uid']) in item_ids]
+ else:
+ items = [items[0]]
+
+ entries = []
+ QUALITIES = ('ld', 'sd', 'hd', )
+
+ for item in items:
+ title = item['title']
+ quality = qualities(QUALITIES)
+ formats = []
+ path = None
+ for f in item.get('mbr', []):
+ src = url_or_none(f.get('src'))
+ if not src:
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'_(\d{3,})\.mp4', src, 'tbr', default=None))
+ if not path:
+ path = self._search_regex(
+ r'//[^/]+/(.+?)_\d+\.mp4', src,
+ 'm3u8 path', default=None)
+ formats.append({
+ 'url': src,
+ 'format_id': f.get('name'),
+ 'tbr': tbr,
+ 'source_preference': quality(f.get('name')),
+ # quality metadata of http formats may be incorrect
+ 'preference': -1,
+ })
+ # m3u8 URL format is reverse engineered from [1] (search for
+ # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
+ # is taken from [2].
+ # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
+ # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
+ if not path and len(formats) == 1:
+ path = self._search_regex(
+ r'//[^/]+/(.+?$)', formats[0]['url'],
+ 'm3u8 path', default=None)
+ if path:
+ if len(formats) == 1:
+ m3u8_path = ','
+ else:
+ tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
+ m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
+ formats.extend(self._extract_m3u8_formats(
+ 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
+ % (path, m3u8_path),
+ display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(item.get('duration') or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', fatal=False))
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
+
+ entries.append({
+ 'id': compat_str(item.get('id') or item['uid']),
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'duration': int_or_none(duration),
+ 'formats': formats
+ })
+
+ title = self._html_search_regex(
+ (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+ r"'title'\s*:\s*'([^']+)'"),
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None)
+ description = self._html_search_regex(
+ r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+ webpage, 'description', default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/hypervideo_dl/extractor/fivemin.py b/hypervideo_dl/extractor/fivemin.py
new file mode 100644
index 0000000..f3f876e
--- /dev/null
+++ b/hypervideo_dl/extractor/fivemin.py
@@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FiveMinIE(InfoExtractor):
+ IE_NAME = '5min'
+ _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+ 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+ 'md5': '4f7b0b79bf1a470e5004f7112385941d',
+ 'info_dict': {
+ 'id': '518013791',
+ 'ext': 'mp4',
+ 'title': 'iPad Mini with Retina Display Review',
+ 'description': 'iPad mini with Retina Display review',
+ 'duration': 177,
+ 'uploader': 'engadget',
+ 'upload_date': '20131115',
+ 'timestamp': 1384515288,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ },
+ {
+ # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
+ 'url': '5min:518086247',
+ 'md5': 'e539a9dd682c288ef5a498898009f69e',
+ 'info_dict': {
+ 'id': '518086247',
+ 'ext': 'mp4',
+ 'title': 'How to Make a Next-Level Fruit Salad',
+ 'duration': 184,
+ },
+ 'skip': 'no longer available',
+ },
+ {
+ 'url': 'http://embed.5min.com/518726732/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://delivery.vidible.tv/aol?playList=518013791',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result('aol-video:%s' % video_id)
diff --git a/hypervideo_dl/extractor/fivetv.py b/hypervideo_dl/extractor/fivetv.py
new file mode 100644
index 0000000..c4c0f1b
--- /dev/null
+++ b/hypervideo_dl/extractor/fivetv.py
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class FiveTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?5-tv\.ru/
+ (?:
+ (?:[^/]+/)+(?P<id>\d+)|
+ (?P<path>[^/?#]+)(?:[/?#])?
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'http://5-tv.ru/news/96814/',
+ 'md5': 'bbff554ad415ecf5416a2f48c22d9283',
+ 'info_dict': {
+ 'id': '96814',
+ 'ext': 'mp4',
+ 'title': 'Россияне выбрали имя для общенациональной платежной системы',
+ 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 180,
+ },
+ }, {
+ 'url': 'http://5-tv.ru/video/1021729/',
+ 'info_dict': {
+ 'id': '1021729',
+ 'ext': 'mp4',
+ 'title': '3D принтер',
+ 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 180,
+ },
+ }, {
+ # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
+ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
+ 'info_dict': {
+ 'id': 'glavnoe',
+ 'ext': 'mp4',
+ 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'redirect to «Известия. Главное» project page',
+ }, {
+ 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/films/1507502/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/programs/broadcast/508713/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://5-tv.ru/angel/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('path')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
+ r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
+ webpage, 'video url')
+
+ title = self._og_search_title(webpage, default=None) or self._search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/flickr.py b/hypervideo_dl/extractor/flickr.py
new file mode 100644
index 0000000..9f166ef
--- /dev/null
+++ b/hypervideo_dl/extractor/flickr.py
@@ -0,0 +1,116 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+)
+
+
+class FlickrIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
+ 'md5': '164fe3fa6c22e18d448d4d5af2330f31',
+ 'info_dict': {
+ 'id': '5645318632',
+ 'ext': 'mpg',
+ 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
+ 'title': 'Dark Hollow Waterfalls',
+ 'duration': 19,
+ 'timestamp': 1303528740,
+ 'upload_date': '20110423',
+ 'uploader_id': '10922353@N03',
+ 'uploader': 'Forest Wander',
+ 'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/',
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
+ 'license': 'Attribution-ShareAlike',
+ }
+ }
+ _API_BASE_URL = 'https://api.flickr.com/services/rest?'
+ # https://help.yahoo.com/kb/flickr/SLN25525.html
+ _LICENSES = {
+ '0': 'All Rights Reserved',
+ '1': 'Attribution-NonCommercial-ShareAlike',
+ '2': 'Attribution-NonCommercial',
+ '3': 'Attribution-NonCommercial-NoDerivs',
+ '4': 'Attribution',
+ '5': 'Attribution-ShareAlike',
+ '6': 'Attribution-NoDerivs',
+ '7': 'No known copyright restrictions',
+ '8': 'United States government work',
+ '9': 'Public Domain Dedication (CC0)',
+ '10': 'Public Domain Work',
+ }
+
+ def _call_api(self, method, video_id, api_key, note, secret=None):
+ query = {
+ 'photo_id': video_id,
+ 'method': 'flickr.%s' % method,
+ 'api_key': api_key,
+ 'format': 'json',
+ 'nojsoncallback': 1,
+ }
+ if secret:
+ query['secret'] = secret
+ data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note)
+ if data['stat'] != 'ok':
+ raise ExtractorError(data['message'])
+ return data
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ api_key = self._download_json(
+ 'https://www.flickr.com/hermes_error_beacon.gne', video_id,
+ 'Downloading api key')['site_key']
+
+ video_info = self._call_api(
+ 'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
+ if video_info['media'] == 'video':
+ streams = self._call_api(
+ 'video.getStreamInfo', video_id, api_key,
+ 'Downloading streams info', video_info['secret'])['streams']
+
+ preference = qualities(
+ ['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
+
+ formats = []
+ for stream in streams['stream']:
+ stream_type = compat_str(stream.get('type'))
+ formats.append({
+ 'format_id': stream_type,
+ 'url': stream['_content'],
+ 'preference': preference(stream_type),
+ })
+ self._sort_formats(formats)
+
+ owner = video_info.get('owner', {})
+ uploader_id = owner.get('nsid')
+ uploader_path = owner.get('path_alias') or uploader_id
+ uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None
+
+ return {
+ 'id': video_id,
+ 'title': video_info['title']['_content'],
+ 'description': video_info.get('description', {}).get('_content'),
+ 'formats': formats,
+ 'timestamp': int_or_none(video_info.get('dateuploaded')),
+ 'duration': int_or_none(video_info.get('video', {}).get('duration')),
+ 'uploader_id': uploader_id,
+ 'uploader': owner.get('realname'),
+ 'uploader_url': uploader_url,
+ 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
+ 'view_count': int_or_none(video_info.get('views')),
+ 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])],
+ 'license': self._LICENSES.get(video_info.get('license')),
+ }
+ else:
+ raise ExtractorError('not a video', expected=True)
diff --git a/hypervideo_dl/extractor/folketinget.py b/hypervideo_dl/extractor/folketinget.py
new file mode 100644
index 0000000..b3df93f
--- /dev/null
+++ b/hypervideo_dl/extractor/folketinget.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+ IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+ _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+ _TEST = {
+ 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+ 'md5': '6269e8626fa1a891bf5369b386ae996a',
+ 'info_dict': {
+ 'id': '1165642',
+ 'ext': 'mp4',
+ 'title': 'Åbent samråd i Erhvervsudvalget',
+ 'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+ 'view_count': int,
+ 'width': 768,
+ 'height': 432,
+ 'tbr': 928000,
+ 'timestamp': 1416493800,
+ 'upload_date': '20141120',
+ 'duration': 3960,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+ webpage, 'description', fatal=False)
+
+ player_params = compat_parse_qs(self._search_regex(
+ r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+ webpage, 'player params'))
+ xml_url = player_params['xml'][0]
+ doc = self._download_xml(xml_url, video_id)
+
+ timestamp = parse_iso8601(xpath_text(doc, './/date'))
+ duration = parse_duration(xpath_text(doc, './/duration'))
+ width = int_or_none(xpath_text(doc, './/width'))
+ height = int_or_none(xpath_text(doc, './/height'))
+ view_count = int_or_none(xpath_text(doc, './/views'))
+
+ formats = [{
+ 'format_id': n.attrib['bitrate'],
+ 'url': xpath_text(n, './url', fatal=True),
+ 'tbr': int_or_none(n.attrib['bitrate']),
+ } for n in doc.findall('.//streams/stream')]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'width': width,
+ 'height': height,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
diff --git a/hypervideo_dl/extractor/footyroom.py b/hypervideo_dl/extractor/footyroom.py
new file mode 100644
index 0000000..118325b
--- /dev/null
+++ b/hypervideo_dl/extractor/footyroom.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .streamable import StreamableIE
+
+
+class FootyRoomIE(InfoExtractor):
+ _VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
+ 'info_dict': {
+ 'id': '79922154',
+ 'title': 'VIDEO Hull City 0 - 2 Chelsea',
+ },
+ 'playlist_count': 2,
+ 'add_ie': [StreamableIE.ie_key()],
+ }, {
+ 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
+ 'info_dict': {
+ 'id': '75817984',
+ 'title': 'VIDEO Georgia 0 - 2 Germany',
+ },
+ 'playlist_count': 1,
+ 'add_ie': ['Playwire']
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(self._search_regex(
+ r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
+ playlist_id)
+
+ playlist_title = self._og_search_title(webpage)
+
+ entries = []
+ for video in playlist:
+ payload = video.get('payload')
+ if not payload:
+ continue
+ playwire_url = self._html_search_regex(
+ r'data-config="([^"]+)"', payload,
+ 'playwire url', default=None)
+ if playwire_url:
+ entries.append(self.url_result(self._proto_relative_url(
+ playwire_url, 'http:'), 'Playwire'))
+
+ streamable_url = StreamableIE._extract_url(payload)
+ if streamable_url:
+ entries.append(self.url_result(
+ streamable_url, StreamableIE.ie_key()))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/hypervideo_dl/extractor/formula1.py b/hypervideo_dl/extractor/formula1.py
new file mode 100644
index 0000000..67662e6
--- /dev/null
+++ b/hypervideo_dl/extractor/formula1.py
@@ -0,0 +1,27 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Formula1IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
+ 'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
+ 'info_dict': {
+ 'id': '6060988138001',
+ 'ext': 'mp4',
+ 'title': 'Race highlights - Spain 2016',
+ 'timestamp': 1463332814,
+ 'upload_date': '20160515',
+ 'uploader_id': '6057949432001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ bc_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
diff --git a/hypervideo_dl/extractor/fourtube.py b/hypervideo_dl/extractor/fourtube.py
new file mode 100644
index 0000000..be4e813
--- /dev/null
+++ b/hypervideo_dl/extractor/fourtube.py
@@ -0,0 +1,309 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ str_or_none,
+ str_to_int,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class FourTubeBaseIE(InfoExtractor):
+ def _extract_formats(self, url, video_id, media_id, sources):
+ token_url = 'https://%s/%s/desktop/%s' % (
+ self._TKN_HOST, media_id, '+'.join(sources))
+
+ parsed_url = compat_urlparse.urlparse(url)
+ tokens = self._download_json(token_url, video_id, data=b'', headers={
+ 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
+ 'Referer': url,
+ })
+ formats = [{
+ 'url': tokens[format]['token'],
+ 'format_id': format + 'p',
+ 'resolution': format + 'p',
+ 'quality': int(format),
+ } for format in sources]
+ self._sort_formats(formats)
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
+
+ if kind == 'm' or not display_id:
+ url = self._URL_TEMPLATE % video_id
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta('name', webpage)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage))
+ thumbnail = self._html_search_meta('thumbnailUrl', webpage)
+ uploader_id = self._html_search_regex(
+ r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">',
+ webpage, 'uploader id', fatal=False)
+ uploader = self._html_search_regex(
+ r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">',
+ webpage, 'uploader', fatal=False)
+
+ categories_html = self._search_regex(
+ r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>',
+ webpage, 'categories', fatal=False)
+ categories = None
+ if categories_html:
+ categories = [
+ c.strip() for c in re.findall(
+ r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
+
+ view_count = str_to_int(self._search_regex(
+ r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
+ webpage, 'view count', default=None))
+ like_count = str_to_int(self._search_regex(
+ r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
+ webpage, 'like count', default=None))
+ duration = parse_duration(self._html_search_meta('duration', webpage))
+
+ media_id = self._search_regex(
+ r'<button[^>]+data-id=(["\'])(?P<id>\d+)\1[^>]+data-quality=', webpage,
+ 'media id', default=None, group='id')
+ sources = [
+ quality
+ for _, quality in re.findall(r'<button[^>]+data-quality=(["\'])(.+?)\1', webpage)]
+ if not (media_id and sources):
+ player_js = self._download_webpage(
+ self._search_regex(
+ r'<script[^>]id=(["\'])playerembed\1[^>]+src=(["\'])(?P<url>.+?)\2',
+ webpage, 'player JS', group='url'),
+ video_id, 'Downloading player JS')
+ params_js = self._search_regex(
+ r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
+ player_js, 'initialization parameters')
+ params = self._parse_json('[%s]' % params_js, video_id)
+ media_id = params[0]
+ sources = ['%s' % p for p in params[2]]
+
+ formats = self._extract_formats(url, video_id, media_id, sources)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'categories': categories,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
+
+
+class FourTubeIE(FourTubeBaseIE):
+ IE_NAME = '4tube'
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
+ _TKN_HOST = 'token.4tube.com'
+ _TESTS = [{
+ 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+ 'md5': '6516c8ac63b03de06bc8eac14362db4f',
+ 'info_dict': {
+ 'id': '209733',
+ 'ext': 'mp4',
+ 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
+ 'uploader': 'WCP Club',
+ 'uploader_id': 'wcp-club',
+ 'upload_date': '20131031',
+ 'timestamp': 1383263892,
+ 'duration': 583,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://www.4tube.com/embed/209733',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+ 'only_matching': True,
+ }]
+
+
+class FuxIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
+ _TKN_HOST = 'token.fux.com'
+ _TESTS = [{
+ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+ 'info_dict': {
+ 'id': '195359',
+ 'ext': 'mp4',
+ 'title': 'Awesome fucking in the kitchen ends with cum swallow',
+ 'uploader': 'alenci2342',
+ 'uploader_id': 'alenci2342',
+ 'upload_date': '20131230',
+ 'timestamp': 1388361660,
+ 'duration': 289,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.fux.com/embed/195359',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+ 'only_matching': True,
+ }]
+
+
+class PornTubeIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+ _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
+ _TKN_HOST = 'tkn.porntube.com'
+ _TESTS = [{
+ 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
+ 'info_dict': {
+ 'id': '7089759',
+ 'ext': 'mp4',
+ 'title': 'Teen couple doing anal',
+ 'uploader': 'Alexy',
+ 'uploader_id': '91488',
+ 'upload_date': '20150606',
+ 'timestamp': 1433595647,
+ 'duration': 5052,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
+ 'info_dict': {
+ 'id': '1331406',
+ 'ext': 'mp4',
+ 'title': 'Squirting Teen Ballerina on ECG',
+ 'uploader': 'Exploited College Girls',
+ 'uploader_id': '665',
+ 'channel': 'Exploited College Girls',
+ 'channel_id': '665',
+ 'upload_date': '20130920',
+ 'timestamp': 1379685485,
+ 'duration': 851,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.porntube.com/embed/7089759',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video = self._parse_json(
+ self._search_regex(
+ r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'data', group='value'), video_id,
+ transform_source=lambda x: compat_urllib_parse_unquote(
+ compat_b64decode(x).decode('utf-8')))['page']['video']
+
+ title = video['title']
+ media_id = video['mediaId']
+ sources = [compat_str(e['height'])
+ for e in video['encodings'] if e.get('height')]
+ formats = self._extract_formats(url, video_id, media_id, sources)
+
+ thumbnail = url_or_none(video.get('masterThumb'))
+ uploader = try_get(video, lambda x: x['user']['username'], compat_str)
+ uploader_id = str_or_none(try_get(
+ video, lambda x: x['user']['id'], int))
+ channel = try_get(video, lambda x: x['channel']['name'], compat_str)
+ channel_id = str_or_none(try_get(
+ video, lambda x: x['channel']['id'], int))
+ like_count = int_or_none(video.get('likes'))
+ dislike_count = int_or_none(video.get('dislikes'))
+ view_count = int_or_none(video.get('playsQty'))
+ duration = int_or_none(video.get('durationInSeconds'))
+ timestamp = unified_timestamp(video.get('publishedAt'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader or channel,
+ 'uploader_id': uploader_id or channel_id,
+ 'channel': channel,
+ 'channel_id': channel_id,
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
+
+
+class PornerBrosIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+ _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
+ _TKN_HOST = 'token.pornerbros.com'
+ _TESTS = [{
+ 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+ 'md5': '6516c8ac63b03de06bc8eac14362db4f',
+ 'info_dict': {
+ 'id': '181369',
+ 'ext': 'mp4',
+ 'title': 'Skinny brunette takes big cock down her anal hole',
+ 'uploader': 'PornerBros HD',
+ 'uploader_id': 'pornerbros-hd',
+ 'upload_date': '20130130',
+ 'timestamp': 1359527401,
+ 'duration': 1224,
+ 'view_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.pornerbros.com/embed/181369',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/fox.py b/hypervideo_dl/extractor/fox.py
new file mode 100644
index 0000000..04f4bdb
--- /dev/null
+++ b/hypervideo_dl/extractor/fox.py
@@ -0,0 +1,150 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import uuid
+
+from .adobepass import AdobePassIE
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_duration,
+ try_get,
+ unified_timestamp,
+)
+
+
+class FOXIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ # clip
+ 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
+ 'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
+ 'info_dict': {
+ 'id': '4b765a60490325103ea69888fb2bd4e8',
+ 'ext': 'mp4',
+ 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
+ 'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
+ 'duration': 102,
+ 'timestamp': 1504291893,
+ 'upload_date': '20170901',
+ 'creator': 'FOX',
+ 'series': 'Gotham',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # episode, geo-restricted
+ 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
+ 'only_matching': True,
+ }, {
+ # episode, geo-restricted, tv provided required
+ 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _HOME_PAGE_URL = 'https://www.fox.com/'
+ _API_KEY = 'abdcbed02c124d393b39e818a4312055'
+ _access_token = None
+
+ def _call_api(self, path, video_id, data=None):
+ headers = {
+ 'X-Api-Key': self._API_KEY,
+ }
+ if self._access_token:
+ headers['Authorization'] = 'Bearer ' + self._access_token
+ try:
+ return self._download_json(
+ 'https://api2.fox.com/v2.0/' + path,
+ video_id, data=data, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ entitlement_issues = self._parse_json(
+ e.cause.read().decode(), video_id)['entitlementIssues']
+ for e in entitlement_issues:
+ if e.get('errorCode') == 1005:
+ raise ExtractorError(
+ 'This video is only available via cable service provider '
+ 'subscription. You may want to use --cookies.', expected=True)
+ messages = ', '.join([e['message'] for e in entitlement_issues])
+ raise ExtractorError(messages, expected=True)
+ raise
+
+ def _real_initialize(self):
+ if not self._access_token:
+ mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
+ if mvpd_auth:
+ self._access_token = (self._parse_json(compat_urllib_parse_unquote(
+ mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
+ if not self._access_token:
+ self._access_token = self._call_api(
+ 'login', None, json.dumps({
+ 'deviceId': compat_str(uuid.uuid4()),
+ }).encode())['accessToken']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._call_api('vodplayer/' + video_id, video_id)
+
+ title = video['name']
+ release_url = video['url']
+ try:
+ m3u8_url = self._download_json(release_url, video_id)['playURL']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read().decode(), video_id)
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['US'])
+ raise ExtractorError(error['description'], expected=True)
+ raise
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ data = try_get(
+ video, lambda x: x['trackingData']['properties'], dict) or {}
+
+ duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
+ video.get('duration')) or parse_duration(video.get('duration'))
+ timestamp = unified_timestamp(video.get('datePublished'))
+ creator = data.get('brand') or data.get('network') or video.get('network')
+ series = video.get('seriesName') or data.get(
+ 'seriesName') or data.get('show')
+
+ subtitles = {}
+ for doc_rel in video.get('documentReleases', []):
+ rel_url = doc_rel.get('url')
+ if not url or doc_rel.get('format') != 'SCC':
+ continue
+ subtitles['en'] = [{
+ 'url': rel_url,
+ 'ext': 'scc',
+ }]
+ break
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': video.get('description'),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': parse_age_limit(video.get('contentRating')),
+ 'creator': creator,
+ 'series': series,
+ 'season_number': int_or_none(video.get('seasonNumber')),
+ 'episode': video.get('name'),
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'release_year': int_or_none(video.get('releaseYear')),
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/fox9.py b/hypervideo_dl/extractor/fox9.py
new file mode 100644
index 0000000..91f8f7b
--- /dev/null
+++ b/hypervideo_dl/extractor/fox9.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FOX9IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
+ 'Anvato', video_id)
+
+
+class FOX9NewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
+ 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
+ 'info_dict': {
+ 'id': '314473',
+ 'ext': 'mp4',
+ 'title': 'Bear climbs tree in downtown Duluth',
+ 'description': 'md5:6a36bfb5073a411758a752455408ac90',
+ 'duration': 51,
+ 'timestamp': 1478123580,
+ 'upload_date': '20161102',
+ 'uploader': 'EPFOX',
+ 'categories': ['News', 'Sports'],
+ 'tags': ['news', 'video'],
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ anvato_id = self._search_regex(
+ r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
+ return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
diff --git a/hypervideo_dl/extractor/foxgay.py b/hypervideo_dl/extractor/foxgay.py
new file mode 100644
index 0000000..512a106
--- /dev/null
+++ b/hypervideo_dl/extractor/foxgay.py
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_id,
+ int_or_none,
+ remove_end,
+)
+
+
+class FoxgayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
+ _TEST = {
+ 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
+ 'md5': '344558ccfea74d33b7adbce22e577f54',
+ 'info_dict': {
+ 'id': '2582',
+ 'ext': 'mp4',
+ 'title': 'Fuck Turkish-style',
+ 'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
+ 'age_limit': 18,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = remove_end(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
+ description = get_element_by_id('inf_tit', webpage)
+
+ # The default user-agent with foxgay cookies leads to pages without videos
+ self._downloader.cookiejar.clear('.foxgay.com')
+ # Find the URL for the iFrame which contains the actual video.
+ iframe_url = self._html_search_regex(
+ r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
+ 'video frame', group='url')
+ iframe = self._download_webpage(
+ iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'},
+ note='Downloading video frame')
+ video_data = self._parse_json(self._search_regex(
+ r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id)
+
+ formats = [{
+ 'url': source,
+ 'height': int_or_none(resolution),
+ } for source, resolution in zip(
+ video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnail': video_data.get('act_vid', {}).get('thumb'),
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/foxnews.py b/hypervideo_dl/extractor/foxnews.py
new file mode 100644
index 0000000..63613cb
--- /dev/null
+++ b/hypervideo_dl/extractor/foxnews.py
@@ -0,0 +1,127 @@
+from __future__ import unicode_literals
+
+import re
+
+from .amp import AMPIE
+from .common import InfoExtractor
+
+
+class FoxNewsIE(AMPIE):
+ IE_NAME = 'foxnews'
+ IE_DESC = 'Fox News and Fox Business Video'
+ _VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
+ 'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
+ 'info_dict': {
+ 'id': '3937480',
+ 'ext': 'flv',
+ 'title': 'Frozen in Time',
+ 'description': '16-year-old girl is size of toddler',
+ 'duration': 265,
+ 'timestamp': 1304411491,
+ 'upload_date': '20110503',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
+ 'md5': '5846c64a1ea05ec78175421b8323e2df',
+ 'info_dict': {
+ 'id': '3922535568001',
+ 'ext': 'mp4',
+ 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
+ 'description': "Congressman discusses president's plan",
+ 'duration': 292,
+ 'timestamp': 1417662047,
+ 'upload_date': '20141204',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://video.foxbusiness.com/v/4442309889001',
+ 'only_matching': True,
+ },
+ {
+ # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
+ 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
+ 'only_matching': True,
+ },
+ ]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ host, video_id = re.match(self._VALID_URL, url).groups()
+
+ info = self._extract_feed_info(
+ 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
+ info['id'] = video_id
+ return info
+
+
+class FoxNewsArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
+ IE_NAME = 'foxnews:article'
+
+ _TESTS = [{
+ # data-video-id
+ 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
+ 'md5': '83d44e1aff1433e7a29a7b537d1700b5',
+ 'info_dict': {
+ 'id': '5116295019001',
+ 'ext': 'mp4',
+ 'title': 'Trump and Clinton asked to defend positions on Iraq War',
+ 'description': 'Veterans react on \'The Kelly File\'',
+ 'timestamp': 1473301045,
+ 'upload_date': '20160908',
+ },
+ }, {
+ # iframe embed
+ 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
+ 'info_dict': {
+ 'id': '5748266721001',
+ 'ext': 'flv',
+ 'title': 'Kyle Kashuv has a positive message for the Trump White House',
+ 'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 229,
+ 'timestamp': 1520594670,
+ 'upload_date': '20180309',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._html_search_regex(
+ r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
+ webpage, 'video ID', group='id', default=None)
+ if video_id:
+ return self.url_result(
+ 'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
+
+ return self.url_result(
+ FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
diff --git a/hypervideo_dl/extractor/foxsports.py b/hypervideo_dl/extractor/foxsports.py
new file mode 100644
index 0000000..2b2cb6c
--- /dev/null
+++ b/hypervideo_dl/extractor/foxsports.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FoxSportsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.foxsports.com/tennessee/video/432609859715',
+ 'md5': 'b49050e955bebe32c301972e4012ac17',
+ 'info_dict': {
+ 'id': '432609859715',
+ 'ext': 'mp4',
+ 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
+ 'description': 'Courtney Lee talks about Memphis being focused.',
+ # TODO: fix timestamp
+ 'upload_date': '19700101', # '20150423',
+ # 'timestamp': 1429761109,
+ 'uploader': 'NEWA-FNG-FOXSPORTS',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self.url_result(
+ 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
diff --git a/hypervideo_dl/extractor/franceculture.py b/hypervideo_dl/extractor/franceculture.py
new file mode 100644
index 0000000..14f4cb4
--- /dev/null
+++ b/hypervideo_dl/extractor/franceculture.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+)
+
+
+class FranceCultureIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
+ 'info_dict': {
+ 'id': 'rendez-vous-au-pays-des-geeks',
+ 'display_id': 'rendez-vous-au-pays-des-geeks',
+ 'ext': 'mp3',
+ 'title': 'Rendez-vous au pays des geeks',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140301',
+ 'timestamp': 1393700400,
+ 'vcodec': 'none',
+ }
+ }, {
+ # no thumbnail
+ 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = extract_attributes(self._search_regex(
+ r'''(?sx)
+ (?:
+ </h1>|
+ <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
+ ).*?
+ (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
+ ''',
+ webpage, 'video data'))
+
+ video_url = video_data.get('data-url') or video_data['data-asset-source']
+ title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
+ webpage, 'description', default=None)
+ thumbnail = self._search_regex(
+ r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
+ webpage, 'thumbnail', default=None)
+ uploader = self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>',
+ webpage, 'uploader', default=None)
+ ext = determine_ext(video_url.lower())
+
+ return {
+ 'id': display_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'ext': ext,
+ 'vcodec': 'none' if ext == 'mp3' else None,
+ 'uploader': uploader,
+ 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
+ 'duration': int_or_none(video_data.get('data-duration')),
+ }
diff --git a/hypervideo_dl/extractor/franceinter.py b/hypervideo_dl/extractor/franceinter.py
new file mode 100644
index 0000000..ae822a5
--- /dev/null
+++ b/hypervideo_dl/extractor/franceinter.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import month_by_name
+
+
+class FranceInterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
+
+ _TEST = {
+ 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
+ 'md5': '9e54d7bdb6fdc02a841007f8a975c094',
+ 'info_dict': {
+ 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
+ 'ext': 'mp3',
+ 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
+ 'description': 'md5:401969c5d318c061f86bda1fa359292b',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20160907',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video url', group='url')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
+
+ upload_date_str = self._search_regex(
+ r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
+ webpage, 'upload date', fatal=False)
+ if upload_date_str:
+ upload_date_list = upload_date_str.split()
+ upload_date_list.reverse()
+ upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
+ upload_date_list[2] = '%02d' % int(upload_date_list[2])
+ upload_date = ''.join(upload_date_list)
+ else:
+ upload_date = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': [{
+ 'url': video_url,
+ 'vcodec': 'none',
+ }],
+ }
diff --git a/hypervideo_dl/extractor/francetv.py b/hypervideo_dl/extractor/francetv.py
new file mode 100644
index 0000000..e4ec2e2
--- /dev/null
+++ b/hypervideo_dl/extractor/francetv.py
@@ -0,0 +1,546 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ url_or_none,
+ urljoin,
+)
+from .dailymotion import DailymotionIE
+
+
+class FranceTVBaseInfoExtractor(InfoExtractor):
+ def _make_url_result(self, video_or_full_id, catalog=None):
+ full_id = 'francetv:%s' % video_or_full_id
+ if '@' not in video_or_full_id and catalog:
+ full_id += '@%s' % catalog
+ return self.url_result(
+ full_id, ie=FranceTVIE.ie_key(),
+ video_id=video_or_full_id.split('@')[0])
+
+
+class FranceTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
+ .*?\bidDiffusion=[^&]+|
+ (?:
+ https?://videos\.francetv\.fr/video/|
+ francetv:
+ )
+ (?P<id>[^@]+)(?:@(?P<catalog>.+))?
+ )
+ '''
+
+ _TESTS = [{
+ # without catalog
+ 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
+ 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
+ 'info_dict': {
+ 'id': '162311093',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
+ 'timestamp': 1502623500,
+ 'upload_date': '20170813',
+ },
+ }, {
+ # with catalog
+ 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:162311093',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_1004933@Zouzous',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319@Info-web',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'francetv:SIM_France3',
+ 'only_matching': True,
+ }]
+
+ def _extract_video(self, video_id, catalogue=None):
+ # Videos are identified by idDiffusion so catalogue part is optional.
+ # However when provided, some extra formats may be returned so we pass
+ # it if available.
+ info = self._download_json(
+ 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
+ video_id, 'Downloading video JSON', query={
+ 'idDiffusion': video_id,
+ 'catalogue': catalogue or '',
+ })
+
+ if info.get('status') == 'NOK':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, info['message']),
+ expected=True)
+ allowed_countries = info['videos'][0].get('geoblocage')
+ if allowed_countries:
+ georestricted = True
+ geo_info = self._download_json(
+ 'http://geo.francetv.fr/ws/edgescape.json', video_id,
+ 'Downloading geo restriction info')
+ country = geo_info['reponse']['geo_info']['country_code']
+ if country not in allowed_countries:
+ raise ExtractorError(
+ 'The video is not available from your location',
+ expected=True)
+ else:
+ georestricted = False
+
+ def sign(manifest_url, manifest_id):
+ for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
+ signed_url = url_or_none(self._download_webpage(
+ 'https://%s/esi/TA' % host, video_id,
+ 'Downloading signed %s manifest URL' % manifest_id,
+ fatal=False, query={
+ 'url': manifest_url,
+ }))
+ if signed_url:
+ return signed_url
+ return manifest_url
+
+ is_live = None
+
+ videos = []
+
+ for video in (info.get('videos') or []):
+ if video.get('statut') != 'ONLINE':
+ continue
+ if not video.get('url'):
+ continue
+ videos.append(video)
+
+ if not videos:
+ for device_type in ['desktop', 'mobile']:
+ fallback_info = self._download_json(
+ 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
+ video_id, 'Downloading fallback %s video JSON' % device_type, query={
+ 'device_type': device_type,
+ 'browser': 'chrome',
+ }, fatal=False)
+
+ if fallback_info and fallback_info.get('video'):
+ videos.append(fallback_info['video'])
+
+ formats = []
+ for video in videos:
+ video_url = video.get('url')
+ if not video_url:
+ continue
+ if is_live is None:
+ is_live = (try_get(
+ video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
+ or video.get('is_live') is True
+ or '/live.francetv.fr/' in video_url)
+ format_id = video.get('format')
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ if georestricted:
+ # See https://github.com/ytdl-org/youtube-dl/issues/3963
+ # m3u8 urls work fine
+ continue
+ formats.extend(self._extract_f4m_formats(
+ sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
+ video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ sign(video_url, format_id), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
+ elif video_url.startswith('rtmp'):
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'rtmp-%s' % format_id,
+ 'ext': 'flv',
+ })
+ else:
+ if self._is_valid_url(video_url, video_id, format_id):
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+
+ self._sort_formats(formats)
+
+ title = info['titre']
+ subtitle = info.get('sous_titre')
+ if subtitle:
+ title += ' - %s' % subtitle
+ title = title.strip()
+
+ subtitles = {}
+ subtitles_list = [{
+ 'url': subformat['url'],
+ 'ext': subformat.get('format'),
+ } for subformat in info.get('subtitles', []) if subformat.get('url')]
+ if subtitles_list:
+ subtitles['fr'] = subtitles_list
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(info.get('synopsis')),
+ 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
+ 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
+ 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ catalog = mobj.group('catalog')
+
+ if not video_id:
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('idDiffusion', [None])[0]
+ catalog = qs.get('catalogue', [None])[0]
+ if not video_id:
+ raise ExtractorError('Invalid URL', expected=True)
+
+ return self._extract_video(video_id, catalog)
+
+
+class FranceTVSiteIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
+ 'info_dict': {
+ 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
+ 'timestamp': 1502623500,
+ 'upload_date': '20170813',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ # france3
+ 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
+ 'only_matching': True,
+ }, {
+ # france4
+ 'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
+ 'only_matching': True,
+ }, {
+ # france5
+ 'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
+ 'only_matching': True,
+ }, {
+ # franceo
+ 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
+ 'only_matching': True,
+ }, {
+ # france2 live
+ 'url': 'https://www.france.tv/france-2/direct.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/142749-rouge-sang.html',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'https://www.france.tv/france-3/direct.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ catalogue = None
+ video_id = self._search_regex(
+ r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'video id', default=None, group='id')
+
+ if not video_id:
+ video_id, catalogue = self._html_search_regex(
+ r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
+ webpage, 'video ID').split('@')
+
+ return self._make_url_result(video_id, catalogue)
+
+
+class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
+
+ _TESTS = [{
+ 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
+ 'info_dict': {
+ 'id': 'NI_983319',
+ 'ext': 'mp4',
+ 'title': 'Le Pen Reims',
+ 'upload_date': '20170505',
+ 'timestamp': 1493981780,
+ 'duration': 16,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
+ video_id)
+
+ return self._make_url_result(video['video_id'], video.get('catalog'))
+
+
+class FranceTVInfoIE(FranceTVBaseInfoExtractor):
+ IE_NAME = 'francetvinfo.fr'
+ _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+ 'info_dict': {
+ 'id': '84981923',
+ 'ext': 'mp4',
+ 'title': 'Soir 3',
+ 'upload_date': '20130826',
+ 'timestamp': 1377548400,
+ 'subtitles': {
+ 'fr': 'mincount:2',
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
+ 'only_matching': True,
+ }, {
+ # Dailymotion embed
+ 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
+ 'md5': 'ee7f1828f25a648addc90cb2687b1f12',
+ 'info_dict': {
+ 'id': 'x4iiko0',
+ 'ext': 'mp4',
+ 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
+ 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
+ 'timestamp': 1467011958,
+ 'upload_date': '20160627',
+ 'uploader': 'France Inter',
+ 'uploader_id': 'x2q2ez',
+ },
+ 'add_ie': ['Dailymotion'],
+ }, {
+ 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
+ 'only_matching': True,
+ }, {
+ # "<figure id=" pattern (#28792)
+ 'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ dailymotion_urls = DailymotionIE._extract_urls(webpage)
+ if dailymotion_urls:
+ return self.playlist_result([
+ self.url_result(dailymotion_url, DailymotionIE.ie_key())
+ for dailymotion_url in dailymotion_urls])
+
+ video_id = self._search_regex(
+ (r'player\.load[^;]+src:\s*["\']([^"\']+)',
+ r'id-video=([^@]+@[^"]+)',
+ r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+ r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
+ webpage, 'video id')
+
+ return self._make_url_result(video_id)
+
+
+class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
+ IE_NAME = 'sport.francetvinfo.fr'
+ _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
+ 'info_dict': {
+ 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
+ 'ext': 'mp4',
+ 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
+ 'timestamp': 1523639962,
+ 'upload_date': '20180413',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
+ return self._make_url_result(video_id, 'Sport-web')
+
+
+class GenerationWhatIE(InfoExtractor):
+ IE_NAME = 'france2.fr:generation-what'
+ _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
+ 'info_dict': {
+ 'id': 'wtvKYUG45iw',
+ 'ext': 'mp4',
+ 'title': 'Generation What - Garde à vous - FRA',
+ 'uploader': 'Generation What',
+ 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
+ 'upload_date': '20160411',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ youtube_id = self._search_regex(
+ r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
+ webpage, 'youtube id')
+
+ return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
+
+
+class CultureboxIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
+ 'info_dict': {
+ 'id': 'EV_134885',
+ 'ext': 'mp4',
+ 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
+ 'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
+ 'upload_date': '20180206',
+ 'timestamp': 1517945220,
+ 'duration': 5981,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ if ">Ce live n'est plus disponible en replay<" in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % display_id, expected=True)
+
+ video_id, catalogue = self._search_regex(
+ r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
+ webpage, 'video id').split('@')
+
+ return self._make_url_result(video_id, catalogue)
+
+
+class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
+
+ _TESTS = [{
+ 'url': 'https://www.zouzous.fr/heros/simon',
+ 'info_dict': {
+ 'id': 'simon',
+ },
+ 'playlist_count': 9,
+ }, {
+ 'url': 'https://www.ludo.fr/heros/ninjago',
+ 'info_dict': {
+ 'id': 'ninjago',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.zouzous.fr/heros/simon?abc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ playlist = self._download_json(
+ '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
+
+ if not playlist.get('count'):
+ raise ExtractorError(
+ '%s is not available' % playlist_id, expected=True)
+
+ entries = []
+ for item in playlist['items']:
+ identity = item.get('identity')
+ if identity and isinstance(identity, compat_str):
+ entries.append(self._make_url_result(identity))
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/freesound.py b/hypervideo_dl/extractor/freesound.py
new file mode 100644
index 0000000..138b6bc
--- /dev/null
+++ b/hypervideo_dl/extractor/freesound.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ unified_strdate,
+)
+
+
+class FreesoundIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
+ 'md5': '12280ceb42c81f19a515c745eae07650',
+ 'info_dict': {
+ 'id': '194503',
+ 'ext': 'mp3',
+ 'title': 'gulls in the city.wav',
+ 'description': 'the sounds of seagulls in the city',
+ 'duration': 130.233,
+ 'uploader': 'miklovan',
+ 'upload_date': '20130715',
+ 'tags': list,
+ }
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ audio_url = self._og_search_property('audio', webpage, 'song url')
+ title = self._og_search_property('audio:title', webpage, 'song title')
+
+ description = self._html_search_regex(
+ r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
+ webpage, 'description', fatal=False)
+
+ duration = float_or_none(
+ get_element_by_class('duration', webpage), scale=1000)
+
+ upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
+ uploader = self._og_search_property(
+ 'audio:artist', webpage, 'uploader', fatal=False)
+
+ channels = self._html_search_regex(
+ r'Channels</dt><dd>(.+?)</dd>', webpage,
+ 'channels info', fatal=False)
+
+ tags_str = get_element_by_class('tags', webpage)
+ tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
+
+ audio_urls = [audio_url]
+
+ LQ_FORMAT = '-lq.mp3'
+ if LQ_FORMAT in audio_url:
+ audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
+
+ formats = [{
+ 'url': format_url,
+ 'format_note': channels,
+ 'quality': quality,
+ } for quality, format_url in enumerate(audio_urls)]
+ self._sort_formats(formats)
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/freespeech.py b/hypervideo_dl/extractor/freespeech.py
new file mode 100644
index 0000000..ea9c3e3
--- /dev/null
+++ b/hypervideo_dl/extractor/freespeech.py
@@ -0,0 +1,31 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class FreespeechIE(InfoExtractor):
+ IE_NAME = 'freespeech.org'
+ _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P<id>.+)'
+ _TEST = {
+ 'add_ie': ['Youtube'],
+ 'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/',
+ 'info_dict': {
+ 'id': 'waRk6IPqyWM',
+ 'ext': 'mp4',
+ 'title': 'What\'s At Stake - Net Neutrality Special',
+ 'description': 'Presented by MNN and FSTV',
+ 'upload_date': '20170728',
+ 'uploader_id': 'freespeechtv',
+ 'uploader': 'freespeechtv',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ youtube_url = self._search_regex(
+ r'data-video-url="([^"]+)"',
+ webpage, 'youtube url')
+
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
diff --git a/hypervideo_dl/extractor/freshlive.py b/hypervideo_dl/extractor/freshlive.py
new file mode 100644
index 0000000..72a8459
--- /dev/null
+++ b/hypervideo_dl/extractor/freshlive.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class FreshLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://freshlive.tv/satotv/74712',
+ 'md5': '9f0cf5516979c4454ce982df3d97f352',
+ 'info_dict': {
+ 'id': '74712',
+ 'ext': 'mp4',
+ 'title': 'テスト',
+ 'description': 'テスト',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1511,
+ 'timestamp': 1483619655,
+ 'upload_date': '20170105',
+ 'uploader': 'サトTV',
+ 'uploader_id': 'satotv',
+ 'view_count': int,
+ 'comment_count': int,
+ 'is_live': False,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ options = self._parse_json(
+ self._search_regex(
+ r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>',
+ webpage, 'initial context'),
+ video_id)
+
+ info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id]
+
+ title = info['title']
+
+ if info.get('status') == 'upcoming':
+ raise ExtractorError('Stream %s is upcoming' % video_id, expected=True)
+
+ stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl']
+
+ is_live = info.get('liveStreamUrl') is not None
+
+ formats = self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls')
+
+ if is_live:
+ title = self._live_title(title)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'duration': int_or_none(info.get('airTime')),
+ 'timestamp': unified_timestamp(info.get('createdAt')),
+ 'uploader': try_get(
+ info, lambda x: x['channel']['title'], compat_str),
+ 'uploader_id': try_get(
+ info, lambda x: x['channel']['code'], compat_str),
+ 'uploader_url': try_get(
+ info, lambda x: x['channel']['permalink'], compat_str),
+ 'view_count': int_or_none(info.get('viewCount')),
+ 'comment_count': int_or_none(info.get('commentCount')),
+ 'tags': info.get('tags', []),
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/frontendmasters.py b/hypervideo_dl/extractor/frontendmasters.py
new file mode 100644
index 0000000..f1db33f
--- /dev/null
+++ b/hypervideo_dl/extractor/frontendmasters.py
@@ -0,0 +1,263 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class FrontendMastersBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
+ _LOGIN_URL = 'https://frontendmasters.com/login/'
+
+ _NETRC_MACHINE = 'frontendmasters'
+
+ _QUALITIES = {
+ 'low': {'width': 480, 'height': 360},
+ 'mid': {'width': 1280, 'height': 720},
+ 'high': {'width': 1920, 'height': 1080}
+ }
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post_url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ # Successful login
+ if any(p in response for p in (
+ 'wp-login.php?action=logout', '>Logout')):
+ return
+
+ error = self._html_search_regex(
+ r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
+ response, 'error message', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
+ def _download_course(self, course_name, url):
+ return self._download_json(
+ '%s/courses/%s' % (self._API_BASE, course_name), course_name,
+ 'Downloading course JSON', headers={'Referer': url})
+
+ @staticmethod
+ def _extract_chapters(course):
+ chapters = []
+ lesson_elements = course.get('lessonElements')
+ if isinstance(lesson_elements, list):
+ chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
+ return chapters
+
+ @staticmethod
+ def _extract_lesson(chapters, lesson_id, lesson):
+ title = lesson.get('title') or lesson_id
+ display_id = lesson.get('slug')
+ description = lesson.get('description')
+ thumbnail = lesson.get('thumbnail')
+
+ chapter_number = None
+ index = lesson.get('index')
+ element_index = lesson.get('elementIndex')
+ if (isinstance(index, int) and isinstance(element_index, int)
+ and index < element_index):
+ chapter_number = element_index - index
+ chapter = (chapters[chapter_number - 1]
+ if chapter_number - 1 < len(chapters) else None)
+
+ duration = None
+ timestamp = lesson.get('timestamp')
+ if isinstance(timestamp, compat_str):
+ mobj = re.search(
+ r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
+ timestamp)
+ if mobj:
+ duration = parse_duration(mobj.group('end')) - parse_duration(
+ mobj.group('start'))
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'frontendmasters:%s' % lesson_id,
+ 'ie_key': FrontendMastersIE.ie_key(),
+ 'id': lesson_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ }
+
+
+class FrontendMastersIE(FrontendMastersBaseIE):
+ _VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
+ 'md5': '7f161159710d6b7016a4f4af6fcb05e2',
+ 'info_dict': {
+ 'id': 'a2qogef6ba',
+ 'ext': 'mp4',
+ 'title': 'a2qogef6ba',
+ },
+ 'skip': 'Requires FrontendMasters account credentials',
+ }, {
+ 'url': 'frontendmasters:a2qogef6ba',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lesson_id = self._match_id(url)
+
+ source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
+
+ formats = []
+ for ext in ('webm', 'mp4'):
+ for quality in ('low', 'mid', 'high'):
+ resolution = self._QUALITIES[quality].copy()
+ format_id = '%s-%s' % (ext, quality)
+ format_url = self._download_json(
+ source_url, lesson_id,
+ 'Downloading %s source JSON' % format_id, query={
+ 'f': ext,
+ 'r': resolution['height'],
+ }, headers={
+ 'Referer': url,
+ }, fatal=False)['url']
+
+ if not format_url:
+ continue
+
+ f = resolution.copy()
+ f.update({
+ 'url': format_url,
+ 'ext': ext,
+ 'format_id': format_id,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {
+ 'en': [{
+ 'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
+ }]
+ }
+
+ return {
+ 'id': lesson_id,
+ 'title': lesson_id,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
+ _TEST = {
+ 'url': 'https://frontendmasters.com/courses/web-development/tools',
+ 'info_dict': {
+ 'id': 'a2qogef6ba',
+ 'display_id': 'tools',
+ 'ext': 'mp4',
+ 'title': 'Tools',
+ 'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'chapter': 'Introduction',
+ 'chapter_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires FrontendMasters account credentials',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_name, lesson_name = mobj.group('course_name', 'lesson_name')
+
+ course = self._download_course(course_name, url)
+
+ lesson_id, lesson = next(
+ (video_id, data)
+ for video_id, data in course['lessonData'].items()
+ if data.get('slug') == lesson_name)
+
+ chapters = self._extract_chapters(course)
+ return self._extract_lesson(chapters, lesson_id, lesson)
+
+
+class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'https://frontendmasters.com/courses/web-development/',
+ 'info_dict': {
+ 'id': 'web-development',
+ 'title': 'Introduction to Web Development',
+ 'description': 'md5:9317e6e842098bf725d62360e52d49a6',
+ },
+ 'playlist_count': 81,
+ 'skip': 'Requires FrontendMasters account credentials',
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if FrontendMastersLessonIE.suitable(url) else super(
+ FrontendMastersBaseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ course = self._download_course(course_name, url)
+
+ chapters = self._extract_chapters(course)
+
+ lessons = sorted(
+ course['lessonData'].values(), key=lambda data: data['index'])
+
+ entries = []
+ for lesson in lessons:
+ lesson_name = lesson.get('slug')
+ if not lesson_name:
+ continue
+ lesson_id = lesson.get('hash') or lesson.get('statsId')
+ entries.append(self._extract_lesson(chapters, lesson_id, lesson))
+
+ title = course.get('title')
+ description = course.get('description')
+
+ return self.playlist_result(entries, course_name, title, description)
diff --git a/hypervideo_dl/extractor/fujitv.py b/hypervideo_dl/extractor/fujitv.py
new file mode 100644
index 0000000..a02a943
--- /dev/null
+++ b/hypervideo_dl/extractor/fujitv.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FujiTVFODPlus7IE(InfoExtractor):
+ _VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
+ _BASE_URL = 'http://i.fod.fujitv.co.jp/'
+ _BITRATE_MAP = {
+ 300: (320, 180),
+ 800: (640, 360),
+ 1200: (1280, 720),
+ 2000: (1280, 720),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats = self._extract_m3u8_formats(
+ self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
+ for f in formats:
+ wh = self._BITRATE_MAP.get(f.get('tbr'))
+ if wh:
+ f.update({
+ 'width': wh[0],
+ 'height': wh[1],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ 'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
+ }
diff --git a/hypervideo_dl/extractor/funimation.py b/hypervideo_dl/extractor/funimation.py
new file mode 100644
index 0000000..d8f1e16
--- /dev/null
+++ b/hypervideo_dl/extractor/funimation.py
@@ -0,0 +1,158 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import string
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ ExtractorError,
+ urlencode_postdata
+)
+
+
+class FunimationIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
+
+ _NETRC_MACHINE = 'funimation'
+ _TOKEN = None
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/shows/hacksign/role-play/',
+ 'info_dict': {
+ 'id': '91144',
+ 'display_id': 'role-play',
+ 'ext': 'mp4',
+ 'title': '.hack//SIGN - Role Play',
+ 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
+ 'thumbnail': r're:https?://.*\.jpg',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
+ 'info_dict': {
+ 'id': '210051',
+ 'display_id': 'broadcast-dub-preview',
+ 'ext': 'mp4',
+ 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
+ 'only_matching': True,
+ }, {
+ # with lang code
+ 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
+ 'only_matching': True,
+ }]
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ try:
+ data = self._download_json(
+ 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
+ None, 'Logging in', data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ }))
+ self._TOKEN = data['token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)['error']
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ def _search_kane(name):
+ return self._search_regex(
+ r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
+ webpage, name, default=None)
+
+ title_data = self._parse_json(self._search_regex(
+ r'TITLE_DATA\s*=\s*({[^}]+})',
+ webpage, 'title data', default=''),
+ display_id, js_to_json, fatal=False) or {}
+
+ video_id = title_data.get('id') or self._search_regex([
+ r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
+ r'<iframe[^>]+src="/player/(\d+)',
+ ], webpage, 'video_id', default=None)
+ if not video_id:
+ player_url = self._html_search_meta([
+ 'al:web:url',
+ 'og:video:url',
+ 'og:video:secure_url',
+ ], webpage, fatal=True)
+ video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
+
+ title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
+ series = _search_kane('showName')
+ if series:
+ title = '%s - %s' % (series, title)
+ description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
+
+ try:
+ headers = {}
+ if self._TOKEN:
+ headers['Authorization'] = 'Token %s' % self._TOKEN
+ sources = self._download_json(
+ 'https://www.funimation.com/api/showexperience/%s/' % video_id,
+ video_id, headers=headers, query={
+ 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
+ })['items']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read(), video_id)['errors'][0]
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
+ raise
+
+ formats = []
+ for source in sources:
+ source_url = source.get('src')
+ if not source_url:
+ continue
+ source_type = source.get('videoType') or determine_ext(source_url)
+ if source_type == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': source_type,
+ 'url': source_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'series': series,
+ 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
+ 'episode_number': int_or_none(title_data.get('episodeNum')),
+ 'episode': episode,
+ 'season_id': title_data.get('seriesId'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/funk.py b/hypervideo_dl/extractor/funk.py
new file mode 100644
index 0000000..81d1949
--- /dev/null
+++ b/hypervideo_dl/extractor/funk.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .nexx import NexxIE
+from ..utils import (
+ int_or_none,
+ str_or_none,
+)
+
+
+class FunkIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
+ 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
+ 'info_dict': {
+ 'id': '1155821',
+ 'ext': 'mp4',
+ 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
+ 'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
+ 'timestamp': 1514507395,
+ 'upload_date': '20171229',
+ },
+
+ }, {
+ 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, nexx_id = re.match(self._VALID_URL, url).groups()
+ video = self._download_json(
+ 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
+ return {
+ '_type': 'url_transparent',
+ 'url': 'nexx:741:' + nexx_id,
+ 'ie_key': NexxIE.ie_key(),
+ 'id': nexx_id,
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'channel_id': str_or_none(video.get('channelId')),
+ 'display_id': display_id,
+ 'tags': video.get('tags'),
+ 'thumbnail': video.get('imageUrlLandscape'),
+ }
diff --git a/hypervideo_dl/extractor/fusion.py b/hypervideo_dl/extractor/fusion.py
new file mode 100644
index 0000000..a3f44b8
--- /dev/null
+++ b/hypervideo_dl/extractor/fusion.py
@@ -0,0 +1,84 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+)
+
+
+class FusionIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
+ 'info_dict': {
+ 'id': '3145868',
+ 'ext': 'mp4',
+ 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
+ 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
+ 'duration': 140.0,
+ 'timestamp': 1442589635,
+ 'uploader': 'UNIVISON',
+ 'upload_date': '20150918',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Anvato'],
+ }, {
+ 'url': 'http://fusion.tv/video/201781',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
+
+ info = {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video.get('excerpt'),
+ 'timestamp': parse_iso8601(video.get('published')),
+ 'series': video.get('show'),
+ }
+
+ formats = []
+ src = video.get('src') or {}
+ for f_id, f in src.items():
+ for q_id, q in f.items():
+ q_url = q.get('url')
+ if not q_url:
+ continue
+ ext = determine_ext(q_url, mimetype2ext(q.get('type')))
+ if ext == 'smil':
+ formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
+ elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
+ formats.extend(self._extract_m3u8_formats(
+ q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': '-'.join([f_id, q_id]),
+ 'url': q_url,
+ 'width': int_or_none(q.get('width')),
+ 'height': int_or_none(q.get('height')),
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
+ 'ext': 'mp4' if ext == 'm3u8' else ext,
+ 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+ })
+ if formats:
+ self._sort_formats(formats)
+ info['formats'] = formats
+ else:
+ info.update({
+ '_type': 'url',
+ 'url': 'anvato:uni:' + video['video_ids']['anvato'],
+ 'ie_key': 'Anvato',
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/gaia.py b/hypervideo_dl/extractor/gaia.py
new file mode 100644
index 0000000..e952775
--- /dev/null
+++ b/hypervideo_dl/extractor/gaia.py
@@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class GaiaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
+ _TESTS = [{
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
+ 'info_dict': {
+ 'id': '89356',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 936,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
+ 'info_dict': {
+ 'id': '89351',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 53,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+ _NETRC_MACHINE = 'gaia'
+ _jwt = None
+
+ def _real_initialize(self):
+ auth = self._get_cookies('https://www.gaia.com/').get('auth')
+ if auth:
+ auth = self._parse_json(
+ compat_urllib_parse_unquote(auth.value),
+ None, fatal=False)
+ if not auth:
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ auth = self._download_json(
+ 'https://auth.gaia.com/v1/login',
+ None, data=urlencode_postdata({
+ 'username': username,
+ 'password': password
+ }))
+ if auth.get('success') is False:
+ raise ExtractorError(', '.join(auth['messages']), expected=True)
+ if auth:
+ self._jwt = auth.get('jwt')
+
+ def _real_extract(self, url):
+ display_id, vtype = re.search(self._VALID_URL, url).groups()
+ node_id = self._download_json(
+ 'https://brooklyn.gaia.com/pathinfo', display_id, query={
+ 'path': 'video/' + display_id,
+ })['id']
+ node = self._download_json(
+ 'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
+ vdata = node[vtype]
+ media_id = compat_str(vdata['nid'])
+ title = node['title']
+
+ headers = None
+ if self._jwt:
+ headers = {'Authorization': 'Bearer ' + self._jwt}
+ media = self._download_json(
+ 'https://brooklyn.gaia.com/media/' + media_id,
+ media_id, headers=headers)
+ formats = self._extract_m3u8_formats(
+ media['mediaUrls']['bcHLS'], media_id, 'mp4')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ text_tracks = media.get('textTracks', {})
+ for key in ('captions', 'subtitles'):
+ for lang, sub_url in text_tracks.get(key, {}).items():
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ fivestar = node.get('fivestar', {})
+ fields = node.get('fields', {})
+
+ def get_field_value(key, value_key='value'):
+ return try_get(fields, lambda x: x[key][0][value_key])
+
+ return {
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
+ 'timestamp': int_or_none(node.get('created')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(vdata.get('duration')),
+ 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
+ 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
+ 'comment_count': int_or_none(node.get('comment_count')),
+ 'series': try_get(node, lambda x: x['series']['title'], compat_str),
+ 'season_number': int_or_none(get_field_value('season')),
+ 'season_id': str_or_none(get_field_value('series_nid', 'nid')),
+ 'episode_number': int_or_none(get_field_value('episode')),
+ }
diff --git a/hypervideo_dl/extractor/gameinformer.py b/hypervideo_dl/extractor/gameinformer.py
new file mode 100644
index 0000000..f1b96c1
--- /dev/null
+++ b/hypervideo_dl/extractor/gameinformer.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ get_element_by_id,
+)
+
+
+class GameInformerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
+ _TESTS = [{
+ # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
+ 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
+ 'md5': '292f26da1ab4beb4c9099f1304d2b071',
+ 'info_dict': {
+ 'id': '4515472681001',
+ 'ext': 'mp4',
+ 'title': 'Replay - Animal Crossing',
+ 'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
+ 'timestamp': 1443457610,
+ 'upload_date': '20150928',
+ 'uploader_id': '694940074001',
+ },
+ }, {
+ # Brightcove id inside unique element with field--name-field-brightcove-video-id class
+ 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
+ 'info_dict': {
+ 'id': '6057111913001',
+ 'ext': 'mp4',
+ 'title': 'New Gameplay Today – Streets Of Rogue',
+ 'timestamp': 1562699001,
+ 'upload_date': '20190709',
+ 'uploader_id': '694940074001',
+
+ },
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, display_id, headers=self.geo_verification_headers())
+ brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
+ brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
+ return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)
diff --git a/hypervideo_dl/extractor/gamespot.py b/hypervideo_dl/extractor/gamespot.py
new file mode 100644
index 0000000..7a1beae
--- /dev/null
+++ b/hypervideo_dl/extractor/gamespot.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+from .once import OnceIE
+from ..compat import compat_urllib_parse_unquote
+
+
+class GameSpotIE(OnceIE):
+ _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
+ 'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
+ 'info_dict': {
+ 'id': 'gs-2300-6410818',
+ 'ext': 'mp4',
+ 'title': 'Arma 3 - Community Guide: SITREP I',
+ 'description': 'Check out this video where some of the basics of Arma 3 is explained.',
+ },
+ 'skip': 'manifest URL give HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
+ 'md5': '173ea87ad762cf5d3bf6163dceb255a6',
+ 'info_dict': {
+ 'id': 'gs-2300-6424837',
+ 'ext': 'mp4',
+ 'title': 'Now Playing - The Witcher 3: Wild Hunt',
+ 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
+ },
+ }, {
+ 'url': 'https://www.gamespot.com/videos/embed/6439218/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ data_video = self._parse_json(self._html_search_regex(
+ r'data-video=(["\'])({.*?})\1', webpage,
+ 'video data', group=2), page_id)
+ title = compat_urllib_parse_unquote(data_video['title'])
+ streams = data_video['videoStreams']
+ formats = []
+
+ m3u8_url = streams.get('adaptive_stream')
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, page_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ formats.append(f)
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': f['url'].replace('.m3u8', '.mp4'),
+ })
+ formats.append(http_f)
+
+ mpd_url = streams.get('adaptive_dash')
+ if mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, page_id, mpd_id='dash', fatal=False))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': data_video.get('guid') or page_id,
+ 'display_id': page_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._html_search_meta('description', webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/gamestar.py b/hypervideo_dl/extractor/gamestar.py
new file mode 100644
index 0000000..f00dab2
--- /dev/null
+++ b/hypervideo_dl/extractor/gamestar.py
@@ -0,0 +1,65 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+)
+
+
+class GameStarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?game(?P<site>pro|star)\.de/videos/.*,(?P<id>[0-9]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
+ 'md5': 'ee782f1f8050448c95c5cacd63bc851c',
+ 'info_dict': {
+ 'id': '76110',
+ 'ext': 'mp4',
+ 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
+ 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1406542380,
+ 'upload_date': '20140728',
+ 'duration': 17,
+ }
+ }, {
+ 'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.gamestar.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site')
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ # TODO: there are multiple ld+json objects in the webpage,
+ # while _search_json_ld finds only the first one
+ json_ld = self._parse_json(self._search_regex(
+ r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
+ webpage, 'JSON-LD', group='json_ld'), video_id)
+ info_dict = self._json_ld(json_ld, video_id)
+ info_dict['title'] = remove_end(
+ info_dict['title'], ' - Game%s' % site.title())
+
+ view_count = int_or_none(json_ld.get('interactionCount'))
+ comment_count = int_or_none(self._html_search_regex(
+ r'<span>Kommentare</span>\s*<span[^>]+class=["\']count[^>]+>\s*\(\s*([0-9]+)',
+ webpage, 'comment count', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id,
+ 'ext': 'mp4',
+ 'view_count': view_count,
+ 'comment_count': comment_count
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/gaskrank.py b/hypervideo_dl/extractor/gaskrank.py
new file mode 100644
index 0000000..1726a67
--- /dev/null
+++ b/hypervideo_dl/extractor/gaskrank.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ unified_strdate,
+)
+
+
+class GaskrankIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
+ _TESTS = [{
+ 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
+ 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
+ 'info_dict': {
+ 'id': '201601/26955',
+ 'ext': 'mp4',
+ 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['motorrad-fun'],
+ 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
+ 'uploader_id': 'Bikefun',
+ 'upload_date': '20170110',
+ 'uploader_url': None,
+ }
+ }, {
+ 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
+ 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
+ 'info_dict': {
+ 'id': '201106/15920',
+ 'ext': 'mp4',
+ 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['racing'],
+ 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
+ 'uploader_id': 'IOM',
+ 'upload_date': '20170523',
+ 'uploader_url': 'www.iomtt.com',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
+
+ categories = [re.match(self._VALID_URL, url).group('categories')]
+
+ mobj = re.search(
+ r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
+ webpage)
+ if mobj is not None:
+ uploader_id = mobj.groupdict().get('uploader_id')
+ upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
+
+ uploader_url = self._search_regex(
+ r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
+ webpage, 'uploader_url', default=None)
+ tags = re.findall(
+ r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
+ webpage)
+
+ view_count = self._search_regex(
+ r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
+ webpage, 'view_count', default=None)
+ if view_count:
+ view_count = int_or_none(view_count.replace('.', ''))
+
+ average_rating = self._search_regex(
+ r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
+ webpage, 'average_rating')
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ video_id = self._search_regex(
+ r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
+ webpage, 'video id', default=display_id)
+
+ entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ entry.update({
+ 'id': video_id,
+ 'title': title,
+ 'categories': categories,
+ 'display_id': display_id,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ 'uploader_url': uploader_url,
+ 'tags': tags,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ })
+ self._sort_formats(entry['formats'])
+
+ return entry
diff --git a/hypervideo_dl/extractor/gazeta.py b/hypervideo_dl/extractor/gazeta.py
new file mode 100644
index 0000000..57c67a4
--- /dev/null
+++ b/hypervideo_dl/extractor/gazeta.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+ _TESTS = [{
+ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+ 'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+ 'info_dict': {
+ 'id': '205566',
+ 'ext': 'mp4',
+ 'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+ 'description': 'md5:38617526050bd17b234728e7f9620a71',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'skip': 'video not found',
+ }, {
+ 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
+ 'md5': '37f19f78355eb2f4256ee1688359f24c',
+ 'info_dict': {
+ 'id': '252048',
+ 'ext': 'mp4',
+ 'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
+ },
+ 'add_ie': ['EaglePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ display_id = mobj.group('id')
+ embed_url = '%s?p=embed' % mobj.group('url')
+ embed_page = self._download_webpage(
+ embed_url, display_id, 'Downloading embed page')
+
+ video_id = self._search_regex(
+ r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+ return self.url_result(
+ 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
diff --git a/hypervideo_dl/extractor/gdcvault.py b/hypervideo_dl/extractor/gdcvault.py
new file mode 100644
index 0000000..acc6478
--- /dev/null
+++ b/hypervideo_dl/extractor/gdcvault.py
@@ -0,0 +1,220 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+ HEADRequest,
+ remove_start,
+ sanitized_Request,
+ smuggle_url,
+ urlencode_postdata,
+)
+
+
+class GDCVaultIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
+ _NETRC_MACHINE = 'gdcvault'
+ _TESTS = [
+ {
+ 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
+ 'md5': '7ce8388f544c88b7ac11c7ab1b593704',
+ 'info_dict': {
+ 'id': '201311826596_AWNY',
+ 'display_id': 'Doki-Doki-Universe-Sweet-Simple',
+ 'ext': 'mp4',
+ 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
+ }
+ },
+ {
+ 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
+ 'info_dict': {
+ 'id': '201203272_1330951438328RSXR',
+ 'display_id': 'Embracing-the-Dark-Art-of',
+ 'ext': 'flv',
+ 'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
+ },
+ 'params': {
+ 'skip_download': True, # Requires rtmpdump
+ }
+ },
+ {
+ 'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
+ 'md5': 'a5eb77996ef82118afbbe8e48731b98e',
+ 'info_dict': {
+ 'id': '1015301',
+ 'display_id': 'Thexder-Meets-Windows-95-or',
+ 'ext': 'flv',
+ 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
+ },
+ 'skip': 'Requires login',
+ },
+ {
+ 'url': 'http://gdcvault.com/play/1020791/',
+ 'only_matching': True,
+ },
+ {
+ # Hard-coded hostname
+ 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
+ 'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+ 'info_dict': {
+ 'id': '840376_BQRC',
+ 'ext': 'mp4',
+ 'display_id': 'Tenacious-Design-and-The-Interface',
+ 'title': 'Tenacious Design and The Interface of \'Destiny\'',
+ },
+ },
+ {
+ # Multiple audios
+ 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
+ 'info_dict': {
+ 'id': '12396_1299111843500GMPX',
+ 'ext': 'mp4',
+ 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
+ },
+ # 'params': {
+ # 'skip_download': True, # Requires rtmpdump
+ # 'format': 'jp', # The japanese audio
+ # }
+ },
+ {
+ # gdc-player.html
+ 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
+ 'info_dict': {
+ 'id': '9350_1238021887562UHXB',
+ 'display_id': 'An-American-engine-in-Tokyo',
+ 'ext': 'mp4',
+ 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
+ },
+ },
+ {
+ # Kaltura Embed
+ 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling',
+ 'info_dict': {
+ 'id': '0_h1fg8j3p',
+ 'ext': 'mp4',
+ 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)',
+ 'timestamp': 1554401811,
+ 'upload_date': '20190404',
+ 'uploader_id': 'joe@blazestreaming.com',
+ },
+ 'params': {
+ 'format': 'mp4-408',
+ },
+ },
+ {
+ # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+ 'url': 'https://www.gdcvault.com/play/1025699',
+ 'info_dict': {
+ 'id': '0_zagynv0a',
+ 'ext': 'mp4',
+ 'title': 'Tech Toolbox',
+ 'upload_date': '20190408',
+ 'uploader_id': 'joe@blazestreaming.com',
+ 'timestamp': 1554764629,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # HTML5 video
+ 'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
+ 'only_matching': True,
+ },
+ ]
+
+ def _login(self, webpage_url, display_id):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
+ return None
+
+ mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
+ login_url = mobj.group('root_url') + 'api/login.php'
+ logout_url = mobj.group('root_url') + 'logout'
+
+ login_form = {
+ 'email': username,
+ 'password': password,
+ }
+
+ request = sanitized_Request(login_url, urlencode_postdata(login_form))
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ self._download_webpage(request, display_id, 'Logging in')
+ start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
+ self._download_webpage(logout_url, display_id, 'Logging out')
+
+ return start_page
+
+ def _real_extract(self, url):
+ video_id, name = re.match(self._VALID_URL, url).groups()
+ display_id = name or video_id
+
+ webpage_url = 'http://www.gdcvault.com/play/' + video_id
+ start_page = self._download_webpage(webpage_url, display_id)
+
+ direct_url = self._search_regex(
+ r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
+ start_page, 'url', default=None)
+ if direct_url:
+ title = self._html_search_regex(
+ r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
+ start_page, 'title')
+ video_url = 'http://www.gdcvault.com' + direct_url
+ # resolve the url so that we can detect the correct extension
+ video_url = self._request_webpage(
+ HEADRequest(video_url), video_id).geturl()
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ }
+
+ embed_url = KalturaIE._extract_url(start_page)
+ if embed_url:
+ embed_url = smuggle_url(embed_url, {'source_url': url})
+ ie_key = 'Kaltura'
+ else:
+ PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
+
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root', default=None)
+ if xml_root is None:
+ # Probably need to authenticate
+ login_res = self._login(webpage_url, display_id)
+ if login_res is None:
+ self.report_warning('Could not login.')
+ else:
+ start_page = login_res
+ # Grab the url from the authenticated page
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root')
+
+ xml_name = self._html_search_regex(
+ r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
+ start_page, 'xml filename', default=None)
+ if not xml_name:
+ info = self._parse_html5_media_entries(url, start_page, video_id)[0]
+ info.update({
+ 'title': remove_start(self._search_regex(
+ r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
+ 'title', default=None) or self._og_search_title(
+ start_page, default=None), 'GDC Vault - '),
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+ return info
+ embed_url = '%s/xml/%s' % (xml_root, xml_name)
+ ie_key = 'DigitallySpeaking'
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': embed_url,
+ 'ie_key': ie_key,
+ }
diff --git a/hypervideo_dl/extractor/gedidigital.py b/hypervideo_dl/extractor/gedidigital.py
new file mode 100644
index 0000000..6c4153b
--- /dev/null
+++ b/hypervideo_dl/extractor/gedidigital.py
@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+)
+
+
+class GediDigitalIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://video\.
+ (?:
+ (?:
+ (?:espresso\.)?repubblica
+ |lastampa
+ |ilsecoloxix
+ )|
+ (?:
+ iltirreno
+ |messaggeroveneto
+ |ilpiccolo
+ |gazzettadimantova
+ |mattinopadova
+ |laprovinciapavese
+ |tribunatreviso
+ |nuovavenezia
+ |gazzettadimodena
+ |lanuovaferrara
+ |corrierealpi
+ |lasentinella
+ )\.gelocal
+ )\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
+ _TESTS = [{
+ 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
+ 'md5': '84658d7fb9e55a6e57ecc77b73137494',
+ 'info_dict': {
+ 'id': '121559',
+ 'ext': 'mp4',
+ 'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
+ 'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
+ 'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
+ 'duration': 125,
+ },
+ }, {
+ 'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_meta(
+ ['twitter:title', 'og:title'], webpage, fatal=True)
+ player_data = re.findall(
+ r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
+ webpage)
+
+ formats = []
+ duration = thumb = None
+ for t, n, v in player_data:
+ if t == 'format':
+ if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
+ continue
+ elif n.endswith('-vod-ak'):
+ formats.extend(self._extract_akamai_formats(
+ v, video_id, {'http': 'media.gedidigital.it'}))
+ else:
+ ext = determine_ext(v)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
+ continue
+ f = {
+ 'format_id': n,
+ 'url': v,
+ }
+ if ext == 'mp3':
+ abr = int_or_none(self._search_regex(
+ r'-mp3-audio-(\d+)', v, 'abr', default=None))
+ f.update({
+ 'abr': abr,
+ 'tbr': abr,
+ 'vcodec': 'none'
+ })
+ else:
+ mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(1)),
+ 'vbr': int_or_none(mobj.group(2)),
+ })
+ if not f.get('vbr'):
+ f['vbr'] = int_or_none(self._search_regex(
+ r'-video-rrtv-(\d+)', v, 'abr', default=None))
+ formats.append(f)
+ elif t == 'param':
+ if n in ['image_full', 'image']:
+ thumb = v
+ elif n == 'videoDuration':
+ duration = int_or_none(v)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta(
+ ['twitter:description', 'og:description', 'description'], webpage),
+ 'thumbnail': thumb or self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/generic.py b/hypervideo_dl/extractor/generic.py
new file mode 100644
index 0000000..7da038a
--- /dev/null
+++ b/hypervideo_dl/extractor/generic.py
@@ -0,0 +1,3597 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import os
+import re
+import sys
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import (
+ compat_etree_fromstring,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ HEADRequest,
+ int_or_none,
+ is_html,
+ js_to_json,
+ KNOWN_EXTENSIONS,
+ merge_dicts,
+ mimetype2ext,
+ orderedSet,
+ parse_duration,
+ sanitized_Request,
+ smuggle_url,
+ unescapeHTML,
+ unified_timestamp,
+ unsmuggle_url,
+ UnsupportedError,
+ url_or_none,
+ xpath_attr,
+ xpath_text,
+ xpath_with_ns,
+)
+from .commonprotocols import RtmpIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nbc import NBCSportsVPlayerIE
+from .ooyala import OoyalaIE
+from .rutv import RUTVIE
+from .tvc import TVCIE
+from .sportbox import SportBoxIE
+from .myvi import MyviIE
+from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
+from .senateisvp import SenateISVPIE
+from .svt import SVTIE
+from .pornhub import PornHubIE
+from .xhamster import XHamsterEmbedIE
+from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
+from .tube8 import Tube8IE
+from .mofosex import MofosexEmbedIE
+from .spankwire import SpankwireIE
+from .youporn import YouPornIE
+from .vimeo import (
+ VimeoIE,
+ VHXEmbedIE,
+)
+from .dailymotion import DailymotionIE
+from .dailymail import DailyMailIE
+from .onionstudios import OnionStudiosIE
+from .viewlift import ViewLiftEmbedIE
+from .mtv import MTVServicesEmbeddedIE
+from .pladform import PladformIE
+from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
+from .googledrive import GoogleDriveIE
+from .jwplatform import JWPlatformIE
+from .digiteka import DigitekaIE
+from .arkena import ArkenaIE
+from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
+from .threeqsdn import ThreeQSDNIE
+from .theplatform import ThePlatformIE
+from .kaltura import KalturaIE
+from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
+from .soundcloud import SoundcloudEmbedIE
+from .tunein import TuneInBaseIE
+from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
+from .twentymin import TwentyMinutenIE
+from .ustream import UstreamIE
+from .arte import ArteTVEmbedIE
+from .videopress import VideoPressIE
+from .rutube import RutubeIE
+from .limelight import LimelightBaseIE
+from .anvato import AnvatoIE
+from .washingtonpost import WashingtonPostIE
+from .wistia import WistiaIE
+from .mediaset import MediasetIE
+from .joj import JojIE
+from .megaphone import MegaphoneIE
+from .vzaar import VzaarIE
+from .channel9 import Channel9IE
+from .vshare import VShareIE
+from .mediasite import MediasiteIE
+from .springboardplatform import SpringboardPlatformIE
+from .yapfiles import YapFilesIE
+from .vice import ViceIE
+from .xfileshare import XFileShareIE
+from .cloudflarestream import CloudflareStreamIE
+from .peertube import PeerTubeIE
+from .teachable import TeachableIE
+from .indavideo import IndavideoEmbedIE
+from .apa import APAIE
+from .foxnews import FoxNewsIE
+from .viqeo import ViqeoIE
+from .expressen import ExpressenIE
+from .zype import ZypeIE
+from .odnoklassniki import OdnoklassnikiIE
+from .vk import VKIE
+from .kinja import KinjaEmbedIE
+from .arcpublishing import ArcPublishingIE
+from .medialaan import MedialaanIE
+from .simplecast import SimplecastIE
+
+
+class GenericIE(InfoExtractor):
+ IE_DESC = 'Generic downloader that works on some sites'
+ _VALID_URL = r'.*'
+ IE_NAME = 'generic'
+ _TESTS = [
+ # Direct link to a video
+ {
+ 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
+ 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
+ 'info_dict': {
+ 'id': 'trailer',
+ 'ext': 'mp4',
+ 'title': 'trailer',
+ 'upload_date': '20100513',
+ }
+ },
+ # Direct link to media delivered compressed (until Accept-Encoding is *)
+ {
+ 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
+ 'md5': '128c42e68b13950268b648275386fc74',
+ 'info_dict': {
+ 'id': 'FictionJunction-Parallel_Hearts',
+ 'ext': 'flac',
+ 'title': 'FictionJunction-Parallel_Hearts',
+ 'upload_date': '20140522',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ],
+ 'skip': 'URL invalid',
+ },
+ # Direct download with broken HEAD
+ {
+ 'url': 'http://ai-radio.org:8000/radio.opus',
+ 'info_dict': {
+ 'id': 'radio',
+ 'ext': 'opus',
+ 'title': 'radio',
+ },
+ 'params': {
+ 'skip_download': True, # infinite live stream
+ },
+ 'expected_warnings': [
+ r'501.*Not Implemented',
+ r'400.*Bad Request',
+ ],
+ },
+ # Direct link with incorrect MIME type
+ {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'md5': '4ccbebe5f36706d85221f204d7eb5913',
+ 'info_dict': {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'id': '5_Lennart_Poettering_-_Systemd',
+ 'ext': 'webm',
+ 'title': '5_Lennart_Poettering_-_Systemd',
+ 'upload_date': '20141120',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ },
+ # RSS feed
+ {
+ 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'info_dict': {
+ 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'title': 'Zero Punctuation',
+ 'description': 're:.*groundbreaking video review series.*'
+ },
+ 'playlist_mincount': 11,
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'title': 'MSNBC Rachel Maddow (video)',
+ 'description': 're:.*her unique approach to storytelling.*',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'ext': 'mov',
+ 'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
+ 'title': 're:MSNBC Rachel Maddow',
+ 'description': 're:.*her unique approach to storytelling.*',
+ 'timestamp': int,
+ 'upload_date': compat_str,
+ 'duration': float,
+ },
+ }],
+ },
+ # RSS feed with item with description and thumbnails
+ {
+ 'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
+ 'info_dict': {
+ 'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
+ 'title': 're:.*100% Hydrogen.*',
+ 'description': 're:.*In this episode.*',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'ext': 'm4a',
+ 'id': 'c1c879525ce2cb640b344507e682c36d',
+ 'title': 're:Hydrogen!',
+ 'description': 're:.*In this episode we are going.*',
+ 'timestamp': 1567977776,
+ 'upload_date': '20190908',
+ 'duration': 459,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 1,
+ 'season_number': 1,
+ 'age_limit': 0,
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # RSS feed with enclosures and unsupported link URLs
+ {
+ 'url': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'info_dict': {
+ 'id': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+ 'title': 'Hello Internet',
+ },
+ 'playlist_mincount': 100,
+ },
+ # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
+ {
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
+ 'info_dict': {
+ 'id': 'smil',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'upload_date': '20130627',
+ 'formats': 'mincount:16',
+ 'subtitles': 'mincount:1',
+ },
+ 'params': {
+ 'force_generic_extractor': True,
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
+ {
+ 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
+ 'info_dict': {
+ 'id': 'hds',
+ 'ext': 'flv',
+ 'title': 'hds',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from https://www.restudy.dk/video/play/id/1637
+ {
+ 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
+ 'info_dict': {
+ 'id': 'video_1637',
+ 'ext': 'flv',
+ 'title': 'video_1637',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
+ {
+ 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
+ 'info_dict': {
+ 'id': 'smil-service',
+ 'ext': 'flv',
+ 'title': 'smil-service',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
+ {
+ 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
+ 'info_dict': {
+ 'id': '4719370',
+ 'ext': 'mp4',
+ 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
+ {
+ 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
+ 'info_dict': {
+ 'id': 'mZlp2ctYIUEB',
+ 'ext': 'mp4',
+ 'title': 'Tikibad ontruimd wegens brand',
+ 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 33,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # MPD from http://dash-mse-test.appspot.com/media.html
+ {
+ 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
+ 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
+ 'info_dict': {
+ 'id': 'car-20120827-manifest',
+ 'ext': 'mp4',
+ 'title': 'car-20120827-manifest',
+ 'formats': 'mincount:9',
+ 'upload_date': '20130904',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ },
+ # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
+ {
+ 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
+ 'info_dict': {
+ 'id': 'content',
+ 'ext': 'mp4',
+ 'title': 'content',
+ 'formats': 'mincount:8',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # m3u8 served with Content-Type: text/plain
+ {
+ 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+ 'info_dict': {
+ 'id': 'index',
+ 'ext': 'mp4',
+ 'title': 'index',
+ 'upload_date': '20140720',
+ 'formats': 'mincount:11',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # google redirect
+ {
+ 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
+ 'info_dict': {
+ 'id': 'cmQHVoWB5FY',
+ 'ext': 'mp4',
+ 'upload_date': '20130224',
+ 'uploader_id': 'TheVerge',
+ 'description': r're:^Chris Ziegler takes a look at the\.*',
+ 'uploader': 'The Verge',
+ 'title': 'First Firefox OS phones side-by-side',
+ },
+ 'params': {
+ 'skip_download': False,
+ }
+ },
+ {
+ # redirect in Refresh HTTP header
+ 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ 'info_dict': {
+ 'id': 'pO8h3EaFRdo',
+ 'ext': 'mp4',
+ 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ 'upload_date': '20150917',
+ 'uploader_id': 'brtvofficial',
+ 'uploader': 'Boiler Room',
+ },
+ 'params': {
+ 'skip_download': False,
+ },
+ },
+ {
+ 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
+ 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
+ 'info_dict': {
+ 'id': '13601338388002',
+ 'ext': 'mp4',
+ 'uploader': 'www.hodiho.fr',
+ 'title': 'R\u00e9gis plante sa Jeep',
+ }
+ },
+ # bandcamp page with custom domain
+ {
+ 'add_ie': ['Bandcamp'],
+ 'url': 'http://bronyrock.com/track/the-pony-mash',
+ 'info_dict': {
+ 'id': '3235767654',
+ 'ext': 'mp3',
+ 'title': 'The Pony Mash',
+ 'uploader': 'M_Pallante',
+ },
+ 'skip': 'There is a limit of 200 free downloads / month for the test song',
+ },
+ {
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
+ 'add_ie': ['BrightcoveLegacy'],
+ 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+ 'info_dict': {
+ 'id': '2765128793001',
+ 'ext': 'mp4',
+ 'title': 'Le cours de bourse : l’analyse technique',
+ 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
+ 'uploader': 'BFM BUSINESS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'add_id': ['BrightcoveLegacy'],
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
+ 'url': 'http://bcove.me/i6nfkrc3',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'id': '3101154703001',
+ 'ext': 'mp4',
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ 'skip': 'video gone',
+ },
+ {
+ 'url': 'http://www.championat.com/video/football/v/87/87499.html',
+ 'md5': 'fb973ecf6e4a78a67453647444222983',
+ 'info_dict': {
+ 'id': '3414141473001',
+ 'ext': 'mp4',
+ 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+ 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+ 'uploader': 'Championat',
+ },
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
+ 'add_ie': ['BrightcoveLegacy'],
+ 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
+ 'info_dict': {
+ 'id': '3866516442001',
+ 'ext': 'mp4',
+ 'title': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'description': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'uploader': 'SBS Broadcasting',
+ },
+ 'skip': 'Restricted to Netherlands',
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ {
+ # Brightcove video in <iframe>
+ 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
+ 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
+ 'info_dict': {
+ 'id': '5360463607001',
+ 'ext': 'mp4',
+ 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
+ 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
+ 'uploader': 'United Nations',
+ 'uploader_id': '1362235914001',
+ 'timestamp': 1489593889,
+ 'upload_date': '20170315',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ {
+ # Brightcove with alternative playerID key
+ 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
+ 'info_dict': {
+ 'id': 'nmeth.2062_SV1',
+ 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2228375078001',
+ 'ext': 'mp4',
+ 'title': 'nmeth.2062-sv1',
+ 'description': 'nmeth.2062-sv1',
+ 'timestamp': 1363357591,
+ 'upload_date': '20130315',
+ 'uploader': 'Nature Publishing Group',
+ 'uploader_id': '1964492299001',
+ },
+ }],
+ },
+ {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ 'skip': 'video rotates...weekly?',
+ },
+ {
+ # Brightcove:new type [2].
+ 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
+ 'md5': '2b35148fcf48da41c9fb4591650784f3',
+ 'info_dict': {
+ 'id': '5348741021001',
+ 'ext': 'mp4',
+ 'upload_date': '20170306',
+ 'uploader_id': '4191638492001',
+ 'timestamp': 1488769918,
+ 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
+
+ },
+ },
+ {
+ # Alternative brightcove <video> attributes
+ 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
+ 'info_dict': {
+ 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
+ },
+ 'playlist': [{
+ 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
+ 'info_dict': {
+ 'id': '5311302538001',
+ 'ext': 'mp4',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
+ 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
+ 'timestamp': 1486321708,
+ 'upload_date': '20170205',
+ 'uploader_id': '800000640001',
+ },
+ 'only_matching': True,
+ }],
+ },
+ {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ # ooyala video
+ {
+ 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+ 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
+ 'info_dict': {
+ 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+ 'ext': 'mp4',
+ 'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
+ },
+ 'add_ie': ['Ooyala'],
+ },
+ {
+ # ooyala video embedded with http://player.ooyala.com/iframe.js
+ 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
+ 'info_dict': {
+ 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
+ 'ext': 'mp4',
+ 'title': '"Steve Jobs: Man in the Machine" trailer',
+ 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'movie expired',
+ },
+ # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+ {
+ 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+ 'info_dict': {
+ 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+ 'ext': 'mp4',
+ 'title': 'Steampunk Fest Comes to Honesdale',
+ 'duration': 43.276,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # embed.ly video
+ {
+ 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
+ 'info_dict': {
+ 'id': '9ODmcdjQcHQ',
+ 'ext': 'mp4',
+ 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
+ 'upload_date': '20140225',
+ 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
+ 'uploader': 'Tested',
+ 'uploader_id': 'testedcom',
+ },
+ # No need to test YoutubeIE here
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # funnyordie embed
+ {
+ 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+ 'info_dict': {
+ 'id': '18e820ec3f',
+ 'ext': 'mp4',
+ 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+ 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+ },
+ # HEAD requests lead to endless 301, while GET is OK
+ 'expected_warnings': ['301'],
+ },
+ # RUTV embed
+ {
+ 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+ 'info_dict': {
+ 'id': '776940',
+ 'ext': 'mp4',
+ 'title': 'Охотское море стало целиком российским',
+ 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # TVC embed
+ {
+ 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
+ 'info_dict': {
+ 'id': '55304',
+ 'ext': 'mp4',
+ 'title': 'Дошкольное воспитание',
+ },
+ },
+ # SportBox embed
+ {
+ 'url': 'http://www.vestifinance.ru/articles/25753',
+ 'info_dict': {
+ 'id': '25753',
+ 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '370908',
+ 'title': 'Госзаказ. День 3',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370905',
+ 'title': 'Госзаказ. День 2',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370902',
+ 'title': 'Госзаказ. День 1',
+ 'ext': 'mp4',
+ }
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Myvi.ru embed
+ {
+ 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+ 'info_dict': {
+ 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+ 'ext': 'mp4',
+ 'title': 'Ужастики, русский трейлер (2015)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 153,
+ }
+ },
+ # XHamster embed
+ {
+ 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+ 'info_dict': {
+ 'id': 'showthread',
+ 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+ },
+ 'playlist_mincount': 7,
+ # This forum does not allow <iframe> syntaxes anymore
+ # Now HTML tags are displayed as-is
+ 'skip': 'No videos on this page',
+ },
+ # Embedded TED video
+ {
+ 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+ 'md5': '65fdff94098e4a607385a60c5177c638',
+ 'info_dict': {
+ 'id': '1969',
+ 'ext': 'mp4',
+ 'title': 'Hidden miracles of the natural world',
+ 'uploader': 'Louie Schwartzberg',
+ 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
+ }
+ },
+ # nowvideo embed hidden behind percent encoding
+ {
+ 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+ 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+ 'info_dict': {
+ 'id': '06e53103ca9aa',
+ 'ext': 'flv',
+ 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
+ 'description': 'No description',
+ },
+ },
+ # arte embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+ 'md5': '7653032cbb25bf6c80d80f217055fa43',
+ 'info_dict': {
+ 'id': '048195-004_PLUS7-F',
+ 'ext': 'flv',
+ 'title': 'X:enius',
+ 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+ 'upload_date': '20140320',
+ },
+ 'params': {
+ 'skip_download': 'Requires rtmpdump'
+ },
+ 'skip': 'video gone',
+ },
+ # francetv embed
+ {
+ 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'mp4',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Forbidden'
+ ]
+ },
+ # Condé Nast embed
+ {
+ 'url': 'http://www.wired.com/2014/04/honda-asimo/',
+ 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
+ 'info_dict': {
+ 'id': '53501be369702d3275860000',
+ 'ext': 'mp4',
+ 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
+ }
+ },
+ # Dailymotion embed
+ {
+ 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
+ 'md5': '441aeeb82eb72c422c7f14ec533999cd',
+ 'info_dict': {
+ 'id': 'k2mm4bCdJ6CQ2i7c8o2',
+ 'ext': 'mp4',
+ 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+ 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
+ 'uploader': 'Spi0n',
+ 'uploader_id': 'xgditw',
+ 'upload_date': '20140425',
+ 'timestamp': 1398441542,
+ },
+ 'add_ie': ['Dailymotion'],
+ },
+ # DailyMail embed
+ {
+ 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
+ 'info_dict': {
+ 'id': '1495629',
+ 'ext': 'mp4',
+ 'title': 'Care worker punches elderly dementia patient in head 11 times',
+ 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
+ },
+ 'add_ie': ['DailyMail'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # YouTube embed
+ {
+ 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
+ 'info_dict': {
+ 'id': 'FXRb4ykk4S0',
+ 'ext': 'mp4',
+ 'title': 'The NBL Auction 2014',
+ 'uploader': 'BADMINTON England',
+ 'uploader_id': 'BADMINTONEvents',
+ 'upload_date': '20140603',
+ 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # MTVServices embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
+ 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
+ 'info_dict': {
+ 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
+ 'ext': 'mp4',
+ 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
+ 'description': 'Two valets share their love for movie star Liam Neesons.',
+ 'timestamp': 1349922600,
+ 'upload_date': '20121011',
+ },
+ },
+ # YouTube embed via <data-embed-url="">
+ {
+ 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
+ 'info_dict': {
+ 'id': '4vAffPZIT44',
+ 'ext': 'mp4',
+ 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
+ 'uploader': 'Gameloft',
+ 'uploader_id': 'gameloft',
+ 'upload_date': '20140828',
+ 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # YouTube <object> embed
+ {
+ 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+ 'md5': '516718101ec834f74318df76259fb3cc',
+ 'info_dict': {
+ 'id': 'msN87y-iEx0',
+ 'ext': 'webm',
+ 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+ 'upload_date': '20080526',
+ 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
+ 'uploader': 'Christopher Sykes',
+ 'uploader_id': 'ChristopherJSykes',
+ },
+ 'add_ie': ['Youtube'],
+ },
+ # Camtasia studio
+ {
+ 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+ 'playlist': [{
+ 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+ 'ext': 'flv',
+ 'duration': 2235.90,
+ }
+ }, {
+ 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+ 'ext': 'flv',
+ 'duration': 2235.93,
+ }
+ }],
+ 'info_dict': {
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ }
+ },
+ # Flowplayer
+ {
+ 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
+ 'md5': '9d65602bf31c6e20014319c7d07fba27',
+ 'info_dict': {
+ 'id': '5123ea6d5e5a7',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'uploader': 'www.handjobhub.com',
+ 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
+ }
+ },
+ # Multiple brightcove videos
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
+ {
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ }
+ },
+ # MLB embed
+ {
+ 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+ 'md5': '96f09a37e44da40dd083e12d9a683327',
+ 'info_dict': {
+ 'id': '33322633',
+ 'ext': 'mp4',
+ 'title': 'Ump changes call to ball',
+ 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+ 'duration': 48,
+ 'timestamp': 1401537900,
+ 'upload_date': '20140531',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ # Wistia embed
+ {
+ 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+ 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
+ 'info_dict': {
+ 'id': '6e2wtrbdaf',
+ 'ext': 'mov',
+ 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+ 'description': 'a Paywall Videos video from Remilon',
+ 'duration': 644.072,
+ 'uploader': 'study.com',
+ 'timestamp': 1459678540,
+ 'upload_date': '20160403',
+ 'filesize': 24687186,
+ },
+ },
+ {
+ 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+ 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+ 'info_dict': {
+ 'id': 'uxjb0lwrcz',
+ 'ext': 'mp4',
+ 'title': 'Conversation about Hexagonal Rails Part 1',
+ 'description': 'a Martin Fowler video from ThoughtWorks',
+ 'duration': 1715.0,
+ 'uploader': 'thoughtworks.wistia.com',
+ 'timestamp': 1401832161,
+ 'upload_date': '20140603',
+ },
+ },
+ # Wistia standard embed (async)
+ {
+ 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video from getdrip-1',
+ 'duration': 4986.95,
+ 'timestamp': 1463607249,
+ 'upload_date': '20160518',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # Soundcloud embed
+ {
+ 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
+ 'info_dict': {
+ 'id': '174391317',
+ 'ext': 'mp3',
+ 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
+ 'uploader': 'Sophos Security',
+ 'title': 'Chet Chat 171 - Oct 29, 2014',
+ 'upload_date': '20141029',
+ }
+ },
+ # Soundcloud multiple embeds
+ {
+ 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+ 'info_dict': {
+ 'id': '52809',
+ 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
+ },
+ 'playlist_mincount': 7,
+ },
+ # TuneIn station embed
+ {
+ 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+ 'info_dict': {
+ 'id': '204146',
+ 'ext': 'mp3',
+ 'title': 'CNRV',
+ 'location': 'Paris, France',
+ 'is_live': True,
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # Livestream embed
+ {
+ 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
+ 'info_dict': {
+ 'id': '67864563',
+ 'ext': 'flv',
+ 'upload_date': '20141112',
+ 'title': 'Rosetta #CometLanding webcast HL 10',
+ }
+ },
+ # Another Livestream embed, without 'new.' in URL
+ {
+ 'url': 'https://www.freespeech.org/',
+ 'info_dict': {
+ 'id': '123537347',
+ 'ext': 'mp4',
+ 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # LazyYT
+ {
+ 'url': 'https://skiplagged.com/',
+ 'info_dict': {
+ 'id': 'skiplagged',
+ 'title': 'Skiplagged: The smart way to find cheap flights',
+ },
+ 'playlist_mincount': 1,
+ 'add_ie': ['Youtube'],
+ },
+ # Cinchcast embed
+ {
+ 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+ 'info_dict': {
+ 'id': '7141703',
+ 'ext': 'mp3',
+ 'upload_date': '20141126',
+ 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+ }
+ },
+ # Cinerama player
+ {
+ 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+ 'info_dict': {
+ 'id': '730m_DandD_1901_512k',
+ 'ext': 'mp4',
+ 'uploader': 'www.abc.net.au',
+ 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+ }
+ },
+ # embedded viddler video
+ {
+ 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'mp4',
+ 'uploader': 'deadspin',
+ 'title': 'WALL-TO-GORTAT',
+ 'timestamp': 1422285291,
+ 'upload_date': '20150126',
+ },
+ 'add_ie': ['Viddler'],
+ },
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
+ },
+ # jwplayer YouTube
+ {
+ 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
+ 'info_dict': {
+ 'id': 'Mrj4DVp2zeA',
+ 'ext': 'mp4',
+ 'upload_date': '20150212',
+ 'uploader': 'The National Archives UK',
+ 'description': 'md5:8078af856dca76edc42910b61273dbbf',
+ 'uploader_id': 'NationalArchives08',
+ 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
+ },
+ },
+ # jwplayer rtmp
+ {
+ 'url': 'http://www.suffolk.edu/sjc/live.php',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'flv',
+ 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+ 'uploader': 'www.suffolk.edu',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
+ },
+ # Complex jwplayer
+ {
+ 'url': 'http://www.indiedb.com/games/king-machine/videos',
+ 'info_dict': {
+ 'id': 'videos',
+ 'ext': 'mp4',
+ 'title': 'king machine trailer 1',
+ 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # JWPlayer config passed as variable
+ 'url': 'http://www.txxx.com/videos/3326530/ariele/',
+ 'info_dict': {
+ 'id': '3326530_hq',
+ 'ext': 'mp4',
+ 'title': 'ARIELE | Tube Cup',
+ 'uploader': 'www.txxx.com',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # JWPlatform iframe
+ 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
+ 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+ 'info_dict': {
+ 'id': 'O0c5JcKT',
+ 'ext': 'mp4',
+ 'upload_date': '20171122',
+ 'timestamp': 1511366290,
+ 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ },
+ {
+ # Video.js embed, multiple formats
+ 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+ 'info_dict': {
+ 'id': 'yygqldloqIk',
+ 'ext': 'mp4',
+ 'title': 'SolidWorks. Урок 6 Настройка чертежа',
+ 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+ 'upload_date': '20130314',
+ 'uploader': 'PROстое3D',
+ 'uploader_id': 'PROstoe3D',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Video.js embed, single format
+ 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+ 'info_dict': {
+ 'id': 'watch',
+ 'ext': 'mp4',
+ 'title': 'Step 1 - Good Foundation',
+ 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # rtl.nl embed
+ {
+ 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'aanslagen-kopenhagen',
+ 'title': 'Aanslagen Kopenhagen',
+ }
+ },
+ # Zapiks embed
+ {
+ 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+ 'info_dict': {
+ 'id': '118046',
+ 'ext': 'mp4',
+ 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+ }
+ },
+ # Kaltura embed (different embed code)
+ {
+ 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+ 'info_dict': {
+ 'id': '1_a52wc67y',
+ 'ext': 'flv',
+ 'upload_date': '20150127',
+ 'uploader_id': 'PremierMedia',
+ 'timestamp': int,
+ 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+ },
+ },
+ # Kaltura embed with single quotes
+ {
+ 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
+ 'info_dict': {
+ 'id': '0_izeg5utt',
+ 'ext': 'mp4',
+ 'title': '35871',
+ 'timestamp': 1355743100,
+ 'upload_date': '20121217',
+ 'uploader_id': 'cplapp@learn360.com',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura embedded via quoted entry_id
+ 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
+ 'info_dict': {
+ 'id': '0_utuok90b',
+ 'ext': 'mp4',
+ 'title': '06_matthew_brender_raj_dutt',
+ 'timestamp': 1466638791,
+ 'upload_date': '20160622',
+ },
+ 'add_ie': ['Kaltura'],
+ 'expected_warnings': [
+ 'Could not send HEAD request'
+ ],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # Kaltura embedded, some fileExt broken (#11480)
+ 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+ 'info_dict': {
+ 'id': '1_sgtvehim',
+ 'ext': 'mp4',
+ 'title': 'Our "Standard Models" of particle physics and cosmology',
+ 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+ 'timestamp': 1321158993,
+ 'upload_date': '20111113',
+ 'uploader_id': 'kps1',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed
+ 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
+ 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
+ 'info_dict': {
+ 'id': '0_f2cfbpwy',
+ 'ext': 'mp4',
+ 'title': 'I. M. Pei: A Centennial Celebration',
+ 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
+ 'upload_date': '20170403',
+ 'uploader_id': 'batchUser',
+ 'timestamp': 1491232186,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed, more sophisticated
+ 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
+ 'info_dict': {
+ 'id': '1_9gzouybz',
+ 'ext': 'mp4',
+ 'title': 'lecture-05sep2017',
+ 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+ 'upload_date': '20170913',
+ 'uploader_id': 'eps2',
+ 'timestamp': 1505340777,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # meta twitter:player
+ 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
+ 'info_dict': {
+ 'id': '0_01b42zps',
+ 'ext': 'mp4',
+ 'title': 'Main Twerk (Video)',
+ 'upload_date': '20171208',
+ 'uploader_id': 'sebastian.salinas@thechive.com',
+ 'timestamp': 1512713057,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ # referrer protected EaglePlatform embed
+ {
+ 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+ 'info_dict': {
+ 'id': '582306',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3382,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # ClipYou (EaglePlatform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable.',
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ # Playwire embed
+ {
+ 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+ 'info_dict': {
+ 'id': '3519514',
+ 'ext': 'mp4',
+ 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 45.115,
+ },
+ },
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+ 'timestamp': 1427237531,
+ 'uploader': 'Crunch Report',
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Crooks and Liars embed
+ {
+ 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ },
+ },
+ # Crooks and Liars external embed
+ {
+ 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
+ 'info_dict': {
+ 'id': 'MTE3MjUtMzQ2MzA',
+ 'ext': 'mp4',
+ 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
+ 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
+ 'timestamp': 1265032391,
+ 'upload_date': '20100201',
+ 'uploader': 'Heather',
+ },
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20140107',
+ 'timestamp': 1389118457,
+ },
+ 'skip': 'Invalid Page URL',
+ },
+ # NBC News embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
+ 'md5': '1aa589c675898ae6d37a17913cf68d66',
+ 'info_dict': {
+ 'id': 'x_dtl_oa_LettermanliftPR_160608',
+ 'ext': 'mp4',
+ 'title': 'David Letterman: A Preview',
+ 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+ 'upload_date': '20160609',
+ 'timestamp': 1465431544,
+ 'uploader': 'NBCU-NEWS',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'https://video.udn.com/news/300346',
+ 'md5': 'fd2060e988c326991037b9aff9df21a6',
+ 'info_dict': {
+ 'id': '300346',
+ 'ext': 'mp4',
+ 'title': '中一中男師變性 全校師生力挺',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
+ },
+ # Brightcove URL in single quotes
+ {
+ 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+ 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+ 'info_dict': {
+ 'id': '4255764656001',
+ 'ext': 'mp4',
+ 'title': 'SN Presents: Russell Martin, World Citizen',
+ 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+ 'uploader': 'Rogers Sportsnet',
+ 'uploader_id': '1704050871',
+ 'upload_date': '20150525',
+ 'timestamp': 1432570283,
+ },
+ },
+ # Kinja embed
+ {
+ 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
+ 'info_dict': {
+ 'id': '106351',
+ 'ext': 'mp4',
+ 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
+ }
+ },
+ # SnagFilms embed
+ {
+ 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ }
+ },
+ # AdobeTVVideo embed
+ {
+ 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ },
+ # BrightcoveInPageEmbed embed
+ {
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
+ },
+ # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+ # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+ {
+ 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+ 'info_dict': {
+ 'id': '4785848093001',
+ 'ext': 'mp4',
+ 'title': 'The Cardinal Pell Interview',
+ 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+ 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'uploader_id': '2733773828001',
+ 'upload_date': '20160304',
+ 'timestamp': 1457083087,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ {
+ # Brightcove embed with whitespace around attribute names
+ 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
+ 'info_dict': {
+ 'id': '3167554373001',
+ 'ext': 'mp4',
+ 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
+ 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
+ 'uploader_id': '1079349493',
+ 'upload_date': '20140207',
+ 'timestamp': 1391810548,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Another form of arte.tv embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+ 'md5': '850bfe45417ddf221288c88a0cffe2e2',
+ 'info_dict': {
+ 'id': '030273-562_PLUS7-F',
+ 'ext': 'mp4',
+ 'title': 'ARTE Reportage - Nulle part, en France',
+ 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+ 'upload_date': '20160409',
+ },
+ },
+ # LiveLeak embed
+ {
+ 'url': 'http://www.wykop.pl/link/3088787/',
+ 'md5': '7619da8c820e835bef21a1efa2a0fc71',
+ 'info_dict': {
+ 'id': '874_1459135191',
+ 'ext': 'mp4',
+ 'title': 'Man shows poor quality of new apartment building',
+ 'description': 'The wall is like a sand pile.',
+ 'uploader': 'Lake8737',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Another LiveLeak embed pattern (#13336)
+ {
+ 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
+ 'info_dict': {
+ 'id': '2eb_1496309988',
+ 'ext': 'mp4',
+ 'title': 'Thief robs place where everyone was armed',
+ 'description': 'md5:694d73ee79e535953cf2488562288eee',
+ 'uploader': 'brazilwtf',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Duplicated embedded video URLs
+ {
+ 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
+ 'info_dict': {
+ 'id': '149298443_480_16c25b74_2',
+ 'ext': 'mp4',
+ 'title': 'vs. Blue Orange Spring Game',
+ 'uploader': 'www.hudl.com',
+ },
+ },
+ # twitter:player:stream embed
+ {
+ 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+ 'info_dict': {
+ 'id': 'master',
+ 'ext': 'mp4',
+ 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+ 'uploader': 'www.rtl.be',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ # twitter:player embed
+ {
+ 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
+ 'md5': 'a3e0df96369831de324f0778e126653c',
+ 'info_dict': {
+ 'id': '4909620399001',
+ 'ext': 'mp4',
+ 'title': 'What Do Black Holes Sound Like?',
+ 'description': 'what do black holes sound like',
+ 'upload_date': '20160524',
+ 'uploader_id': '29913724001',
+ 'timestamp': 1464107587,
+ 'uploader': 'TheAtlantic',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'md5': 'fbcde74f534176ecb015849146dd3aee',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook <iframe> embed, plugin video
+ {
+ 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
+ 'info_dict': {
+ 'id': '1754168231264132',
+ 'ext': 'mp4',
+ 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
+ 'uploader': 'Tariq Ramadan (official)',
+ 'timestamp': 1496758379,
+ 'upload_date': '20170606',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'md5': 'a47372ee61b39a7b90287094d447d94e',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
+ },
+ # Wordpress "YouTube Video Importer" plugin
+ {
+ 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
+ 'md5': 'd16797741b560b485194eddda8121b48',
+ 'info_dict': {
+ 'id': 'HNTXWDXV9Is',
+ 'ext': 'mp4',
+ 'title': 'Blue Devils Drumline Stanford lot 2016',
+ 'upload_date': '20160627',
+ 'uploader_id': 'GENOCIDE8GENERAL10',
+ 'uploader': 'cylus cyrus',
+ },
+ },
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # multiple kaltura embeds, nsfw
+ 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+ 'info_dict': {
+ 'id': 'kamila-avec-video-jaime-sadomie',
+ 'title': "Kamila avec vídeo “J'aime sadomie”",
+ },
+ 'playlist_count': 8,
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ {
+ # generic vimeo embed that requires original URL passed as Referer
+ 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
+ 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+ 'info_dict': {
+ 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': 'Royalty free test video',
+ 'timestamp': 1432816365,
+ 'upload_date': '20150528',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ArkenaIE.ie_key()],
+ },
+ {
+ 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
+ 'info_dict': {
+ 'id': '1c7141f46c',
+ 'ext': 'mp4',
+ 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [Vbox7IE.ie_key()],
+ },
+ {
+ # DBTV embeds
+ 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+ 'info_dict': {
+ 'id': '43254897',
+ 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ # Videa embeds
+ 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+ 'info_dict': {
+ 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+ 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+ },
+ 'playlist_mincount': 2,
+ },
+ {
+ # 20 minuten embed
+ 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [TwentyMinutenIE.ie_key()],
+ },
+ {
+ # VideoPress embed
+ 'url': 'https://en.support.wordpress.com/videopress/',
+ 'info_dict': {
+ 'id': 'OcobLTqC',
+ 'ext': 'm4v',
+ 'title': 'IMG_5786',
+ 'timestamp': 1435711927,
+ 'upload_date': '20150701',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [VideoPressIE.ie_key()],
+ },
+ {
+ # Rutube embed
+ 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+ 'info_dict': {
+ 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+ 'ext': 'flv',
+ 'title': 'Магаззино: Казань 2',
+ 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+ 'uploader': 'Магаззино',
+ 'upload_date': '20170228',
+ 'uploader_id': '996642',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [RutubeIE.ie_key()],
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
+ {
+ # Senate ISVP iframe https
+ 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+ 'md5': 'fb8c70b0b515e5037981a2492099aab8',
+ 'info_dict': {
+ 'id': 'govtaff020316',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player',
+ },
+ 'add_ie': [SenateISVPIE.ie_key()],
+ },
+ {
+ # Limelight embeds (1 channel embed + 4 media embeds)
+ 'url': 'http://www.sedona.com/FacilitatorTraining2017',
+ 'info_dict': {
+ 'id': 'FacilitatorTraining2017',
+ 'title': 'Facilitator Training 2017',
+ },
+ 'playlist_mincount': 5,
+ },
+ {
+ # Limelight embed (LimelightPlayerUtil.embed)
+ 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+ 'info_dict': {
+ 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+ 'ext': 'mp4',
+ 'title': '07448641',
+ 'timestamp': 1499890639,
+ 'upload_date': '20170712',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['LimelightMedia'],
+ },
+ {
+ 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
+ 'info_dict': {
+ 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
+ 'title': 'Standoff with Walnut Creek murder suspect ends',
+ 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
+ },
+ 'playlist_mincount': 4,
+ },
+ {
+ # WashingtonPost embed
+ 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
+ 'info_dict': {
+ 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
+ 'ext': 'mp4',
+ 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
+ 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
+ 'timestamp': 1455216756,
+ 'uploader': 'The Washington Post',
+ 'upload_date': '20160211',
+ },
+ 'add_ie': [WashingtonPostIE.ie_key()],
+ },
+ {
+ # Mediaset embed
+ 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
+ 'info_dict': {
+ 'id': '720642',
+ 'ext': 'mp4',
+ 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [MediasetIE.ie_key()],
+ },
+ {
+ # JOJ.sk embeds
+ 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'info_dict': {
+ 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'title': 'Slovenskom sa prehnala vlna silných búrok',
+ },
+ 'playlist_mincount': 5,
+ 'add_ie': [JojIE.ie_key()],
+ },
+ {
+ # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+ 'url': 'https://tvrain.ru/amp/418921/',
+ 'md5': 'cc00413936695987e8de148b67d14f1d',
+ 'info_dict': {
+ 'id': '418921',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ },
+ },
+ {
+ # vzaar embed
+ 'url': 'http://help.vzaar.com/article/165-embedding-video',
+ 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+ 'info_dict': {
+ 'id': '8707641',
+ 'ext': 'mp4',
+ 'title': 'Building A Business Online: Principal Chairs Q & A',
+ },
+ },
+ {
+ # multiple HTML5 videos on one page
+ 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
+ 'info_dict': {
+ 'id': 'keyscenarios',
+ 'title': 'Rescue Kit 14 Free Edition - Getting started',
+ },
+ 'playlist_count': 4,
+ },
+ {
+ # vshare embed
+ 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+ 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+ 'info_dict': {
+ 'id': '0f64ce6',
+ 'title': 'vl14062007715967',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+ 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+ 'info_dict': {
+ 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+ 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+ 'timestamp': 1474354800,
+ 'upload_date': '20160920',
+ }
+ },
+ {
+ 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
+ 'info_dict': {
+ 'id': '1731611',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
+ 'description': 'md5:eb5f23826a027ba95277d105f248b825',
+ 'timestamp': 1516100691,
+ 'upload_date': '20180116',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [SpringboardPlatformIE.ie_key()],
+ },
+ {
+ 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+ 'info_dict': {
+ 'id': 'vMDE4NzI1Mjgt690b',
+ 'ext': 'mp4',
+ 'title': 'Котята',
+ },
+ 'add_ie': [YapFilesIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # CloudflareStream embed
+ 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # PeerTube embed
+ 'url': 'https://joinpeertube.org/fr/home/',
+ 'info_dict': {
+ 'id': 'home',
+ 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # Indavideo embed
+ 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+ 'info_dict': {
+ 'id': '1693903',
+ 'ext': 'mp4',
+ 'title': 'Így kell otthon hamburgert sütni',
+ 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+ 'timestamp': 1426330212,
+ 'upload_date': '20150314',
+ 'uploader': 'StreetKitchen',
+ 'uploader_id': '546363',
+ },
+ 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # APA embed via JWPlatform embed
+ 'url': 'http://www.vol.at/blue-man-group/5593454',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+ 'md5': 'b68d276de422ab07ee1d49388103f457',
+ 'info_dict': {
+ 'id': '83645793',
+ 'title': 'Lock up and get excited',
+ 'ext': 'mp4'
+ },
+ 'skip': 'TODO: fix nested playlists processing in tests',
+ },
+ {
+ # Viqeo embeds
+ 'url': 'https://viqeo.tv/',
+ 'info_dict': {
+ 'id': 'viqeo',
+ 'title': 'All-new video platform',
+ },
+ 'playlist_count': 6,
+ },
+ {
+ # Squarespace video embed, 2019-08-28
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # {
+ # # Zype embed
+ # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+ # 'info_dict': {
+ # 'id': '5b400b834b32992a310622b9',
+ # 'ext': 'mp4',
+ # 'title': 'Smoky Barbecue Favorites',
+ # 'thumbnail': r're:^https?://.*\.jpe?g',
+ # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ # 'upload_date': '20170909',
+ # 'timestamp': 1504915200,
+ # },
+ # 'add_ie': [ZypeIE.ie_key()],
+ # 'params': {
+ # 'skip_download': True,
+ # },
+ # },
+ {
+ # videojs embed
+ 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
+ 'info_dict': {
+ 'id': 'shell',
+ 'ext': 'mp4',
+ 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
+ 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest'],
+ },
+ {
+ # DailyMotion embed with DM.player
+ 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
+ 'info_dict': {
+ 'id': 'k6aKkGHd9FJs4mtJN39',
+ 'ext': 'mp4',
+ 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
+ 'description': 'This video is private.',
+ 'uploader_id': 'x1jf30l',
+ 'uploader': 'beIN SPORTS USA',
+ 'upload_date': '20190528',
+ 'timestamp': 1559062971,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # },
+ {
+ # VHX Embed
+ 'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
+ 'info_dict': {
+ 'id': '858208',
+ 'ext': 'mp4',
+ 'title': 'Untitled',
+ 'uploader_id': 'user80538407',
+ 'uploader': 'OTT Videos',
+ },
+ },
+ {
+ # ArcPublishing PoWa video player
+ 'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
+ 'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
+ 'info_dict': {
+ 'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
+ 'ext': 'mp4',
+ 'title': 'Senate candidates wave to voters on Anchorage streets',
+ 'description': 'md5:91f51a6511f090617353dc720318b20e',
+ 'timestamp': 1604378735,
+ 'upload_date': '20201103',
+ 'duration': 1581,
+ },
+ },
+ {
+ # MyChannels SDK embed
+ # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
+ 'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
+ 'md5': '90c0699c37006ef18e198c032d81739c',
+ 'info_dict': {
+ 'id': '194165',
+ 'ext': 'mp4',
+ 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
+ 'timestamp': 1611740340,
+ 'upload_date': '20210127',
+ 'duration': 159,
+ },
+ },
+ {
+ # Simplecast player embed
+ 'url': 'https://www.bio.org/podcast',
+ 'info_dict': {
+ 'id': 'podcast',
+ 'title': 'I AM BIO Podcast | BIO',
+ },
+ 'playlist_mincount': 52,
+ },
+ {
+ # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
+ 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
+ 'only_matching': True,
+ },
+ ]
+
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+
+ def _extract_rss(self, url, video_id, doc):
+ playlist_title = doc.find('./channel/title').text
+ playlist_desc_el = doc.find('./channel/description')
+ playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
+
+ NS_MAP = {
+ 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
+ }
+
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = None
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
+ if not next_url:
+ next_url = xpath_text(it, 'link', fatal=False)
+
+ if not next_url:
+ continue
+
+ def itunes(key):
+ return xpath_text(
+ it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
+ default=None)
+
+ duration = itunes('duration')
+ explicit = (itunes('explicit') or '').lower()
+ if explicit in ('true', 'yes'):
+ age_limit = 18
+ elif explicit in ('false', 'no'):
+ age_limit = 0
+ else:
+ age_limit = None
+
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': next_url,
+ 'title': it.find('title').text,
+ 'description': xpath_text(it, 'description', default=None),
+ 'timestamp': unified_timestamp(
+ xpath_text(it, 'pubDate', default=None)),
+ 'duration': int_or_none(duration) or parse_duration(duration),
+ 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
+ 'episode': itunes('title'),
+ 'episode_number': int_or_none(itunes('episode')),
+ 'season_number': int_or_none(itunes('season')),
+ 'age_limit': age_limit,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': url,
+ 'title': playlist_title,
+ 'description': playlist_desc,
+ 'entries': entries,
+ }
+
+ def _extract_camtasia(self, url, video_id, webpage):
+ """ Returns None if no camtasia video can be found. """
+
+ camtasia_cfg = self._search_regex(
+ r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+ webpage, 'camtasia configuration file', default=None)
+ if camtasia_cfg is None:
+ return None
+
+ title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+ camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_cfg = self._download_xml(
+ camtasia_url, video_id,
+ note='Downloading camtasia configuration',
+ errnote='Failed to download camtasia configuration')
+ fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+ entries = []
+ for n in fileset_node.getchildren():
+ url_n = n.find('./uri')
+ if url_n is None:
+ continue
+
+ entries.append({
+ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+ 'title': '%s - %s' % (title, n.tag),
+ 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'duration': float_or_none(n.find('./duration').text),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ }
+
+ def _real_extract(self, url):
+ if url.startswith('//'):
+ return self.url_result(self.http_scheme() + url)
+
+ parsed_url = compat_urlparse.urlparse(url)
+ if not parsed_url.scheme:
+ default_search = self._downloader.params.get('default_search')
+ if default_search is None:
+ default_search = 'fixup_error'
+
+ if default_search in ('auto', 'auto_warning', 'fixup_error'):
+ if re.match(r'^[^\s/]+\.[^\s/]+/', url):
+ self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+ return self.url_result('http://' + url)
+ elif default_search != 'fixup_error':
+ if default_search == 'auto_warning':
+ if re.match(r'^(?:url|URL)$', url):
+ raise ExtractorError(
+ 'Invalid URL: %r . Call hypervideo like this: hypervideo -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
+ expected=True)
+ else:
+ self._downloader.report_warning(
+ 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
+ return self.url_result('ytsearch:' + url)
+
+ if default_search in ('error', 'fixup_error'):
+ raise ExtractorError(
+ '%r is not a valid URL. '
+ 'Set --default-search "ytsearch" (or run hypervideo "ytsearch:%s" ) to search YouTube'
+ % (url, url), expected=True)
+ else:
+ if ':' not in default_search:
+ default_search += ':'
+ return self.url_result(default_search + url)
+
+ url, smuggled_data = unsmuggle_url(url)
+ force_videoid = None
+ is_intentional = smuggled_data and smuggled_data.get('to_generic')
+ if smuggled_data and 'force_videoid' in smuggled_data:
+ force_videoid = smuggled_data['force_videoid']
+ video_id = force_videoid
+ else:
+ video_id = self._generic_id(url)
+
+ self.to_screen('%s: Requesting header' % video_id)
+
+ head_req = HEADRequest(url)
+ head_response = self._request_webpage(
+ head_req, video_id,
+ note=False, errnote='Could not send HEAD request to %s' % url,
+ fatal=False)
+
+ if head_response is not False:
+ # Check for redirect
+ new_url = head_response.geturl()
+ if url != new_url:
+ self.report_following_redirect(new_url)
+ if force_videoid:
+ new_url = smuggle_url(
+ new_url, {'force_videoid': force_videoid})
+ return self.url_result(new_url)
+
+ full_response = None
+ if head_response is False:
+ request = sanitized_Request(url)
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
+ head_response = full_response
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._generic_title(url),
+ 'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
+ }
+
+ # Check for direct link to a video
+ content_type = head_response.headers.get('Content-Type', '').lower()
+ m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
+ if m:
+ format_id = compat_str(m.group('format_id'))
+ if format_id.endswith('mpegurl'):
+ formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+ elif format_id == 'f4m':
+ formats = self._extract_f4m_formats(url, video_id)
+ else:
+ formats = [{
+ 'format_id': format_id,
+ 'url': url,
+ 'vcodec': 'none' if m.group('type') == 'audio' else None
+ }]
+ info_dict['direct'] = True
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
+ if not self._downloader.params.get('test', False) and not is_intentional:
+ force = self._downloader.params.get('force_generic_extractor', False)
+ self._downloader.report_warning(
+ '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
+
+ if not full_response:
+ request = sanitized_Request(url)
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to hypervideo default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after HEAD request finishes, but not sure if we can rely on this.
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
+
+ first_bytes = full_response.read(512)
+
+ # Is it an M3U playlist?
+ if first_bytes.startswith(b'#EXTM3U'):
+ info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+
+ # Maybe it's a direct link to a video?
+ # Be careful not to download the whole thing!
+ if not is_html(first_bytes):
+ self._downloader.report_warning(
+ 'URL could be a direct video link, returning it as such.')
+ info_dict.update({
+ 'direct': True,
+ 'url': url,
+ })
+ return info_dict
+
+ webpage = self._webpage_read_content(
+ full_response, url, video_id, prefix=first_bytes)
+
+ if '<title>DPG Media Privacy Gate</title>' in webpage:
+ webpage = self._download_webpage(url, video_id)
+
+ self.report_extraction(video_id)
+
+ # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
+ try:
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ if doc.tag == 'rss':
+ return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'] = self._parse_ism_formats(doc, url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
+ smil = self._parse_smil(doc, url, video_id)
+ self._sort_formats(smil['formats'])
+ return smil
+ elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+ return self.playlist_result(
+ self._parse_xspf(
+ doc, video_id, xspf_url=url,
+ xspf_base_url=full_response.geturl()),
+ video_id)
+ elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
+ info_dict['formats'] = self._parse_mpd_formats(
+ doc,
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
+ mpd_url=url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
+ info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ except compat_xml_parse_error:
+ pass
+
+ # Is it a Camtasia project?
+ camtasia_res = self._extract_camtasia(url, video_id, webpage)
+ if camtasia_res is not None:
+ return camtasia_res
+
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ # FIXME: unescaping the whole page may break URLs, commenting out for now.
+ # There probably should be a second run of generic extractor on unescaped webpage.
+ # webpage = compat_urllib_parse_unquote(webpage)
+
+ # Unescape squarespace embeds to be detected by generic extractor,
+ # see https://github.com/ytdl-org/youtube-dl/issues/21294
+ webpage = re.sub(
+ r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
+ lambda x: unescapeHTML(x.group(0)), webpage)
+
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ video_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'(?s)<title>(.*?)</title>', webpage, 'video title',
+ default='video')
+
+ # Try to detect age limit automatically
+ age_limit = self._rta_search(webpage)
+ # And then there are the jokers who advertise that they use RTA,
+ # but actually don't.
+ AGE_LIMIT_MARKERS = [
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+ ]
+ if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
+ age_limit = 18
+
+ # video uploader is domain name
+ video_uploader = self._search_regex(
+ r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
+
+ video_description = self._og_search_description(webpage, default=None)
+ video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit,
+ })
+
+ # Look for Brightcove Legacy Studio embeds
+ bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
+ if bc_urls:
+ entries = [{
+ '_type': 'url',
+ 'url': smuggle_url(bc_url, {'Referer': url}),
+ 'ie_key': 'BrightcoveLegacy'
+ } for bc_url in bc_urls]
+
+ return {
+ '_type': 'playlist',
+ 'title': video_title,
+ 'id': video_id,
+ 'entries': entries,
+ }
+
+ # Look for Brightcove New Studio embeds
+ bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
+ if bc_urls:
+ return self.playlist_from_matches(
+ bc_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'referrer': url}),
+ ie='BrightcoveNew')
+
+ # Look for Nexx embeds
+ nexx_urls = NexxIE._extract_urls(webpage)
+ if nexx_urls:
+ return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
+
+ # Look for Nexx iFrame embeds
+ nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
+ if nexx_embed_urls:
+ return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
+
+ # Look for ThePlatform embeds
+ tp_urls = ThePlatformIE._extract_urls(webpage)
+ if tp_urls:
+ return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
+
+ arc_urls = ArcPublishingIE._extract_urls(webpage)
+ if arc_urls:
+ return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
+
+ mychannels_urls = MedialaanIE._extract_urls(webpage)
+ if mychannels_urls:
+ return self.playlist_from_matches(
+ mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
+
+ # Look for embedded rtl.nl player
+ matches = re.findall(
+ r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
+ webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
+
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
+
+ vhx_url = VHXEmbedIE._extract_url(webpage)
+ if vhx_url:
+ return self.url_result(vhx_url, VHXEmbedIE.ie_key())
+
+ vid_me_embed_url = self._search_regex(
+ r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+ webpage, 'vid.me embed', default=None)
+ if vid_me_embed_url is not None:
+ return self.url_result(vid_me_embed_url, 'Vidme')
+
+ # Look for YouTube embeds
+ youtube_urls = YoutubeIE._extract_urls(webpage)
+ if youtube_urls:
+ return self.playlist_from_matches(
+ youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
+
+ matches = DailymotionIE._extract_urls(webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title)
+
+ # Look for embedded Dailymotion playlist player (#3822)
+ m = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
+ if m:
+ playlists = re.findall(
+ r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
+ if playlists:
+ return self.playlist_from_matches(
+ playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
+
+ # Look for DailyMail embeds
+ dailymail_urls = DailyMailIE._extract_urls(webpage)
+ if dailymail_urls:
+ return self.playlist_from_matches(
+ dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+
+ # Look for Teachable embeds, must be before Wistia
+ teachable_url = TeachableIE._extract_url(webpage, url)
+ if teachable_url:
+ return self.url_result(teachable_url)
+
+ # Look for embedded Wistia player
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if wistia_urls:
+ playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ for entry in playlist['entries']:
+ entry.update({
+ '_type': 'url_transparent',
+ 'uploader': video_uploader,
+ })
+ return playlist
+
+ # Look for SVT player
+ svt_url = SVTIE._extract_url(webpage)
+ if svt_url:
+ return self.url_result(svt_url, 'SVT')
+
+ # Look for Bandcamp pages with custom domain
+ mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+ if mobj is not None:
+ burl = unescapeHTML(mobj.group(1))
+ # Don't set the extractor because it can be a track url or an album
+ return self.url_result(burl)
+
+ # Look for embedded Vevo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Viddler player
+ mobj = re.search(
+ r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NYTimes player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Ooyala videos
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ return OoyalaIE._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
+
+ # Look for multiple Ooyala embeds on SBN network websites
+ mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+ if mobj is not None:
+ embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
+ if embeds:
+ return self.playlist_from_matches(
+ embeds, video_id, video_title,
+ getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
+ # Look for MPORA videos
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Mpora')
+
+ # Look for embedded Facebook player
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ if facebook_urls:
+ return self.playlist_from_matches(facebook_urls, video_id, video_title)
+
+ # Look for embedded VK player
+ mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'VK')
+
+ # Look for embedded Odnoklassniki player
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
+
+ # Look for sibnet embedded player
+ sibnet_urls = VKIE._extract_sibnet_urls(webpage)
+ if sibnet_urls:
+ return self.playlist_from_matches(sibnet_urls, video_id, video_title)
+
+ # Look for embedded ivi player
+ mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ivi')
+
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
+ # Look for embed.ly
+ mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+ mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
+ if mobj is not None:
+ return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+
+ # Look for funnyordie embed
+ matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+ if matches:
+ return self.playlist_from_matches(
+ matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
+
+ # Look for Simplecast embeds
+ simplecast_urls = SimplecastIE._extract_urls(webpage)
+ if simplecast_urls:
+ return self.playlist_from_matches(
+ simplecast_urls, video_id, video_title)
+
+ # Look for BBC iPlayer embed
+ matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
+
+ # Look for embedded RUTV player
+ rutv_url = RUTVIE._extract_url(webpage)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ # Look for embedded TVC player
+ tvc_url = TVCIE._extract_url(webpage)
+ if tvc_url:
+ return self.url_result(tvc_url, 'TVC')
+
+ # Look for embedded SportBox player
+ sportbox_urls = SportBoxIE._extract_urls(webpage)
+ if sportbox_urls:
+ return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
+
+ # Look for embedded XHamster player
+ xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
+ if xhamster_urls:
+ return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
+
+ # Look for embedded TNAFlixNetwork player
+ tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
+ if tnaflix_urls:
+ return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
+
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
+
+ # Look for embedded DrTuber player
+ drtuber_urls = DrTuberIE._extract_urls(webpage)
+ if drtuber_urls:
+ return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
+
+ # Look for embedded RedTube player
+ redtube_urls = RedTubeIE._extract_urls(webpage)
+ if redtube_urls:
+ return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
+
+ # Look for embedded Tube8 player
+ tube8_urls = Tube8IE._extract_urls(webpage)
+ if tube8_urls:
+ return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+
+ # Look for embedded Mofosex player
+ mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
+ if mofosex_urls:
+ return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
+
+ # Look for embedded Spankwire player
+ spankwire_urls = SpankwireIE._extract_urls(webpage)
+ if spankwire_urls:
+ return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
+
+ # Look for embedded YouPorn player
+ youporn_urls = YouPornIE._extract_urls(webpage)
+ if youporn_urls:
+ return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
+
+ # Look for embedded Tvigle player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Tvigle')
+
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
+ # Look for embedded Ustream videos
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
+
+ # Look for embedded arte.tv player
+ arte_urls = ArteTVEmbedIE._extract_urls(webpage)
+ if arte_urls:
+ return self.playlist_from_matches(arte_urls, video_id, video_title)
+
+ # Look for embedded francetv player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Myvi.ru player
+ myvi_url = MyviIE._extract_url(webpage)
+ if myvi_url:
+ return self.url_result(myvi_url)
+
+ # Look for embedded soundcloud player
+ soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
+ if soundcloud_urls:
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
+
+ # Look for tunein player
+ tunein_urls = TuneInBaseIE._extract_urls(webpage)
+ if tunein_urls:
+ return self.playlist_from_matches(tunein_urls, video_id, video_title)
+
+ # Look for embedded mtvservices player
+ mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+ if mtvservices_url:
+ return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
+
+ # Look for embedded yahoo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Yahoo')
+
+ # Look for embedded sbs.com.au player
+ mobj = re.search(
+ r'''(?x)
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'SBS')
+
+ # Look for embedded Cinchcast player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Cinchcast')
+
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ webpage)
+ if not mobj:
+ mobj = re.search(
+ r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'MLB')
+
+ mobj = re.search(
+ r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ webpage)
+ if mobj is not None:
+ return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
+
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Livestream')
+
+ # Look for Zapiks embed
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Zapiks')
+
+ # Look for Kaltura embeds
+ kaltura_urls = KalturaIE._extract_urls(webpage)
+ if kaltura_urls:
+ return self.playlist_from_matches(
+ kaltura_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'source_url': url}),
+ ie=KalturaIE.ie_key())
+
+ # Look for EaglePlatform embeds
+ eagleplatform_url = EaglePlatformIE._extract_url(webpage)
+ if eagleplatform_url:
+ return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
+
+ # Look for ClipYou (uses EaglePlatform) embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+ # Look for Pladform embeds
+ pladform_url = PladformIE._extract_url(webpage)
+ if pladform_url:
+ return self.url_result(pladform_url)
+
+ # Look for Videomore embeds
+ videomore_url = VideomoreIE._extract_url(webpage)
+ if videomore_url:
+ return self.url_result(videomore_url)
+
+ # Look for Webcaster embeds
+ webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+ if webcaster_url:
+ return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
+ # Look for Playwire embeds
+ mobj = re.search(
+ r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for Crooks and Liars embeds
+ mobj = re.search(
+ r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Look for NBC News embeds
+ nbc_news_embed_url = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
+ if nbc_news_embed_url:
+ return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
+
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
+ # Look for UDN embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+ if mobj is not None:
+ return self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
+ # Look for Senate ISVP iframe
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ return self.url_result(senate_isvp_url, 'SenateISVP')
+
+ # Look for Kinja embeds
+ kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
+ if kinja_embed_urls:
+ return self.playlist_from_matches(
+ kinja_embed_urls, video_id, video_title)
+
+ # Look for OnionStudios embeds
+ onionstudios_url = OnionStudiosIE._extract_url(webpage)
+ if onionstudios_url:
+ return self.url_result(onionstudios_url)
+
+ # Look for ViewLift embeds
+ viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
+ if viewlift_url:
+ return self.url_result(viewlift_url)
+
+ # Look for JWPlatform embeds
+ jwplatform_urls = JWPlatformIE._extract_urls(webpage)
+ if jwplatform_urls:
+ return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
+
+ # Look for Digiteka embeds
+ digiteka_url = DigitekaIE._extract_url(webpage)
+ if digiteka_url:
+ return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
+
+ # Look for Arkena embeds
+ arkena_url = ArkenaIE._extract_url(webpage)
+ if arkena_url:
+ return self.url_result(arkena_url, ArkenaIE.ie_key())
+
+ # Look for Piksel embeds
+ piksel_url = PikselIE._extract_url(webpage)
+ if piksel_url:
+ return self.url_result(piksel_url, PikselIE.ie_key())
+
+ # Look for Limelight embeds
+ limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
+ if limelight_urls:
+ return self.playlist_result(
+ limelight_urls, video_id, video_title, video_description)
+
+ # Look for Anvato embeds
+ anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
+ if anvato_urls:
+ return self.playlist_result(
+ anvato_urls, video_id, video_title, video_description)
+
+ # Look for AdobeTVVideo embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))),
+ 'AdobeTVVideo')
+
+ # Look for Vine embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
+
+ # Look for VODPlatform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+ # Look for Mangomolo embeds
+ mobj = re.search(
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
+ (?:
+ admin\.mangomolo\.com/analytics/index\.php/customers/embed|
+ player\.mangomolo\.com/v1
+ )/
+ (?:
+ video\?.*?\bid=(?P<video_id>\d+)|
+ (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ ).+?)\1''', webpage)
+ if mobj is not None:
+ info = {
+ '_type': 'url_transparent',
+ 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+ video_id = mobj.group('video_id')
+ if video_id:
+ info.update({
+ 'ie_key': 'MangomoloVideo',
+ 'id': video_id,
+ })
+ else:
+ info.update({
+ 'ie_key': 'MangomoloLive',
+ 'id': mobj.group('channel_id'),
+ })
+ return info
+
+ # Look for Instagram embeds
+ instagram_embed_url = InstagramIE._extract_embed_url(webpage)
+ if instagram_embed_url is not None:
+ return self.url_result(
+ self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+ # Look for LiveLeak embeds
+ liveleak_urls = LiveLeakIE._extract_urls(webpage)
+ if liveleak_urls:
+ return self.playlist_from_matches(liveleak_urls, video_id, video_title)
+
+ # Look for 3Q SDN embeds
+ threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
+ if threeqsdn_url:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': ThreeQSDNIE.ie_key(),
+ 'url': self._proto_relative_url(threeqsdn_url),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+
+ # Look for VBOX7 embeds
+ vbox7_url = Vbox7IE._extract_url(webpage)
+ if vbox7_url:
+ return self.url_result(vbox7_url, Vbox7IE.ie_key())
+
+ # Look for DBTV embeds
+ dbtv_urls = DBTVIE._extract_urls(webpage)
+ if dbtv_urls:
+ return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
+
+ # Look for Videa embeds
+ videa_urls = VideaIE._extract_urls(webpage)
+ if videa_urls:
+ return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
+
+ # Look for 20 minuten embeds
+ twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
+ if twentymin_urls:
+ return self.playlist_from_matches(
+ twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
+
+ # Look for VideoPress embeds
+ videopress_urls = VideoPressIE._extract_urls(webpage)
+ if videopress_urls:
+ return self.playlist_from_matches(
+ videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
+
+ # Look for Rutube embeds
+ rutube_urls = RutubeIE._extract_urls(webpage)
+ if rutube_urls:
+ return self.playlist_from_matches(
+ rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
+
+ # Look for WashingtonPost embeds
+ wapo_urls = WashingtonPostIE._extract_urls(webpage)
+ if wapo_urls:
+ return self.playlist_from_matches(
+ wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
+
+ # Look for Mediaset embeds
+ mediaset_urls = MediasetIE._extract_urls(self, webpage)
+ if mediaset_urls:
+ return self.playlist_from_matches(
+ mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
+
+ # Look for JOJ.sk embeds
+ joj_urls = JojIE._extract_urls(webpage)
+ if joj_urls:
+ return self.playlist_from_matches(
+ joj_urls, video_id, video_title, ie=JojIE.ie_key())
+
+ # Look for megaphone.fm embeds
+ mpfn_urls = MegaphoneIE._extract_urls(webpage)
+ if mpfn_urls:
+ return self.playlist_from_matches(
+ mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
+
+ # Look for vzaar embeds
+ vzaar_urls = VzaarIE._extract_urls(webpage)
+ if vzaar_urls:
+ return self.playlist_from_matches(
+ vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+
+ channel9_urls = Channel9IE._extract_urls(webpage)
+ if channel9_urls:
+ return self.playlist_from_matches(
+ channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
+
+ vshare_urls = VShareIE._extract_urls(webpage)
+ if vshare_urls:
+ return self.playlist_from_matches(
+ vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
+ # Look for Mediasite embeds
+ mediasite_urls = MediasiteIE._extract_urls(webpage)
+ if mediasite_urls:
+ entries = [
+ self.url_result(smuggle_url(
+ compat_urlparse.urljoin(url, mediasite_url),
+ {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
+ for mediasite_url in mediasite_urls]
+ return self.playlist_result(entries, video_id, video_title)
+
+ springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
+ if springboardplatform_urls:
+ return self.playlist_from_matches(
+ springboardplatform_urls, video_id, video_title,
+ ie=SpringboardPlatformIE.ie_key())
+
+ yapfiles_urls = YapFilesIE._extract_urls(webpage)
+ if yapfiles_urls:
+ return self.playlist_from_matches(
+ yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
+
+ vice_urls = ViceIE._extract_urls(webpage)
+ if vice_urls:
+ return self.playlist_from_matches(
+ vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+
+ xfileshare_urls = XFileShareIE._extract_urls(webpage)
+ if xfileshare_urls:
+ return self.playlist_from_matches(
+ xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+
+ cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
+ if cloudflarestream_urls:
+ return self.playlist_from_matches(
+ cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
+
+ peertube_urls = PeerTubeIE._extract_urls(webpage, url)
+ if peertube_urls:
+ return self.playlist_from_matches(
+ peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
+
+ indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
+ if indavideo_urls:
+ return self.playlist_from_matches(
+ indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
+
+ apa_urls = APAIE._extract_urls(webpage)
+ if apa_urls:
+ return self.playlist_from_matches(
+ apa_urls, video_id, video_title, ie=APAIE.ie_key())
+
+ foxnews_urls = FoxNewsIE._extract_urls(webpage)
+ if foxnews_urls:
+ return self.playlist_from_matches(
+ foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
+ sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
+ r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+ webpage)]
+ if sharevideos_urls:
+ return self.playlist_from_matches(
+ sharevideos_urls, video_id, video_title)
+
+ viqeo_urls = ViqeoIE._extract_urls(webpage)
+ if viqeo_urls:
+ return self.playlist_from_matches(
+ viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
+
+ expressen_urls = ExpressenIE._extract_urls(webpage)
+ if expressen_urls:
+ return self.playlist_from_matches(
+ expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
+
+ zype_urls = ZypeIE._extract_urls(webpage)
+ if zype_urls:
+ return self.playlist_from_matches(
+ zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
+
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ if len(entries) == 1:
+ entries[0].update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ else:
+ for num, entry in enumerate(entries, start=1):
+ entry.update({
+ 'id': '%s-%s' % (video_id, num),
+ 'title': '%s (%d)' % (video_title, num),
+ })
+ for entry in entries:
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries, video_id, video_title)
+
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ if jwplayer_data:
+ try:
+ info = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=url)
+ return merge_dicts(info, info_dict)
+ except ExtractorError:
+ # See https://github.com/ytdl-org/youtube-dl/pull/16735
+ pass
+
+ # Video.js embed
+ mobj = re.search(
+ r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ webpage)
+ if mobj is not None:
+ sources = self._parse_json(
+ mobj.group(1), video_id, transform_source=js_to_json,
+ fatal=False) or []
+ if not isinstance(sources, list):
+ sources = [sources]
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ src = compat_urlparse.urljoin(url, src)
+ src_type = source.get('type')
+ if isinstance(src_type, compat_str):
+ src_type = src_type.lower()
+ ext = determine_ext(src).lower()
+ if src_type == 'video/youtube':
+ return self.url_result(src, YoutubeIE.ie_key())
+ if src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ 'ext': (mimetype2ext(src_type)
+ or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ 'http_headers': {
+ 'Referer': full_response.geturl(),
+ },
+ })
+ if formats:
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default={}, expected_type='VideoObject')
+ if json_ld.get('url'):
+ return merge_dicts(json_ld, info_dict)
+
+ def check_video(vurl):
+ if YoutubeIE.suitable(vurl):
+ return True
+ if RtmpIE.suitable(vurl):
+ return True
+ vpath = compat_urlparse.urlparse(vurl).path
+ vext = determine_ext(vpath)
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
+
+ def filter_video(urls):
+ return list(filter(check_video, urls))
+
+ # Start with something easy: JW Player in SWFObject
+ found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
+ if not found:
+ # Look for gorilla-vid style embedding
+ found = filter_video(re.findall(r'''(?sx)
+ (?:
+ jw_plugins|
+ JWPlayerOptions|
+ jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
+ )
+ .*?
+ ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
+ if not found:
+ # Broaden the search a little bit
+ found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
+ if not found:
+ # Broaden the findall a little bit: JWPlayer JS loader
+ found = filter_video(re.findall(
+ r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+ if not found:
+ # Flow player
+ found = filter_video(re.findall(r'''(?xs)
+ flowplayer\("[^"]+",\s*
+ \{[^}]+?\}\s*,
+ \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+ ["']?url["']?\s*:\s*["']([^"']+)["']
+ ''', webpage))
+ if not found:
+ # Cinerama player
+ found = re.findall(
+ r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
+ if not found:
+ # Try to find twitter cards info
+ # twitter:player:stream should be checked before twitter:player since
+ # it is expected to contain a raw stream (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ found = filter_video(re.findall(
+ r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
+ if not found:
+ # We look for Open Graph info:
+ # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+ m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+ if m_video_type is not None:
+ found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
+ if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ found = re.search(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ # In python 2 response HTTP headers are bytestrings
+ if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+ refresh_header = refresh_header.decode('iso-8859-1')
+ found = re.search(REDIRECT_REGEX, refresh_header)
+ if found:
+ new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ if new_url != url:
+ self.report_following_redirect(new_url)
+ return {
+ '_type': 'url',
+ 'url': new_url,
+ }
+ else:
+ found = None
+
+ if not found:
+ # twitter:player is a https URL to iframe player that may or may not
+ # be supported by hypervideo thus this is checked the very last (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url and embed_url != url:
+ return self.url_result(embed_url)
+
+ if not found:
+ raise UnsupportedError(url)
+
+ entries = []
+ for video_url in orderedSet(found):
+ video_url = unescapeHTML(video_url)
+ video_url = video_url.replace('\\/', '/')
+ video_url = compat_urlparse.urljoin(url, video_url)
+ video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+
+ # Sometimes, jwplayer extraction will result in a YouTube URL
+ if YoutubeIE.suitable(video_url):
+ entries.append(self.url_result(video_url, 'Youtube'))
+ continue
+
+ # here's a fun little line of code for you:
+ video_id = os.path.splitext(video_id)[0]
+
+ entry_info_dict = {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ }
+
+ if RtmpIE.suitable(video_url):
+ entry_info_dict.update({
+ '_type': 'url_transparent',
+ 'ie_key': RtmpIE.ie_key(),
+ 'url': video_url,
+ })
+ entries.append(entry_info_dict)
+ continue
+
+ ext = determine_ext(video_url)
+ if ext == 'smil':
+ entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+ elif ext == 'xspf':
+ return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+ elif ext == 'm3u8':
+ entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ elif ext == 'mpd':
+ entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+ elif ext == 'f4m':
+ entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
+ else:
+ entry_info_dict['url'] = video_url
+
+ if entry_info_dict.get('formats'):
+ self._sort_formats(entry_info_dict['formats'])
+
+ entries.append(entry_info_dict)
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ for num, e in enumerate(entries, start=1):
+ # 'url' results don't have a title
+ if e.get('title') is not None:
+ e['title'] = '%s (%d)' % (e['title'], num)
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/gfycat.py b/hypervideo_dl/extractor/gfycat.py
new file mode 100644
index 0000000..18a30fe
--- /dev/null
+++ b/hypervideo_dl/extractor/gfycat.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ qualities,
+ ExtractorError,
+)
+
+
+class GfycatIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
+ _TESTS = [{
+ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
+ 'info_dict': {
+ 'id': 'DeadlyDecisiveGermanpinscher',
+ 'ext': 'mp4',
+ 'title': 'Ghost in the Shell',
+ 'timestamp': 1410656006,
+ 'upload_date': '20140914',
+ 'uploader': 'anonymous',
+ 'duration': 10.4,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'categories': list,
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
+ 'info_dict': {
+ 'id': 'JauntyTimelyAmazontreeboa',
+ 'ext': 'mp4',
+ 'title': 'JauntyTimelyAmazontreeboa',
+ 'timestamp': 1411720126,
+ 'upload_date': '20140926',
+ 'uploader': 'anonymous',
+ 'duration': 3.52,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'categories': list,
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
+ 'only_matching': True
+ }, {
+ 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
+ 'only_matching': True
+ }, {
+ 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
+ 'only_matching': True
+ }, {
+ 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
+ 'only_matching': True
+ }, {
+ 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ gfy = self._download_json(
+ 'https://api.gfycat.com/v1/gfycats/%s' % video_id,
+ video_id, 'Downloading video info')
+ if 'error' in gfy:
+ raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
+ gfy = gfy['gfyItem']
+
+ title = gfy.get('title') or gfy['gfyName']
+ description = gfy.get('description')
+ timestamp = int_or_none(gfy.get('createDate'))
+ uploader = gfy.get('userName')
+ view_count = int_or_none(gfy.get('views'))
+ like_count = int_or_none(gfy.get('likes'))
+ dislike_count = int_or_none(gfy.get('dislikes'))
+ age_limit = 18 if gfy.get('nsfw') == '1' else 0
+
+ width = int_or_none(gfy.get('width'))
+ height = int_or_none(gfy.get('height'))
+ fps = int_or_none(gfy.get('frameRate'))
+ num_frames = int_or_none(gfy.get('numFrames'))
+
+ duration = float_or_none(num_frames, fps) if num_frames and fps else None
+
+ categories = gfy.get('tags') or gfy.get('extraLemmas') or []
+
+ FORMATS = ('gif', 'webm', 'mp4')
+ quality = qualities(FORMATS)
+
+ formats = []
+ for format_id in FORMATS:
+ video_url = gfy.get('%sUrl' % format_id)
+ if not video_url:
+ continue
+ filesize = int_or_none(gfy.get('%sSize' % format_id))
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': width,
+ 'height': height,
+ 'fps': fps,
+ 'filesize': filesize,
+ 'quality': quality(format_id),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/giantbomb.py b/hypervideo_dl/extractor/giantbomb.py
new file mode 100644
index 0000000..c647795
--- /dev/null
+++ b/hypervideo_dl/extractor/giantbomb.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+)
+
+
+class GiantBombIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
+ _TESTS = [{
+ 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
+ 'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
+ 'info_dict': {
+ 'id': '2300-9782',
+ 'display_id': 'quick-look-destiny-the-dark-below',
+ 'ext': 'mp4',
+ 'title': 'Quick Look: Destiny: The Dark Below',
+ 'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
+ 'duration': 2399,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video = json.loads(unescapeHTML(self._search_regex(
+ r'data-video="([^"]+)"', webpage, 'data-video')))
+
+ duration = int_or_none(video.get('lengthSeconds'))
+
+ quality = qualities([
+ 'f4m_low', 'progressive_low', 'f4m_high',
+ 'progressive_high', 'f4m_hd', 'progressive_hd'])
+
+ formats = []
+ for format_id, video_url in video['videoStreams'].items():
+ if format_id == 'f4m_stream':
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
+ if f4m_formats:
+ f4m_formats[0]['quality'] = quality(format_id)
+ formats.extend(f4m_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+
+ if not formats:
+ youtube_id = video.get('youtubeID')
+ if youtube_id:
+ return self.url_result(youtube_id, 'Youtube')
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/giga.py b/hypervideo_dl/extractor/giga.py
new file mode 100644
index 0000000..5a9992a
--- /dev/null
+++ b/hypervideo_dl/extractor/giga.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ compat_str,
+ parse_duration,
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class GigaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
+ 'md5': '6bc5535e945e724640664632055a584f',
+ 'info_dict': {
+ 'id': '2622086',
+ 'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
+ 'ext': 'mp4',
+ 'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
+ 'description': 'md5:afdf5862241aded4718a30dff6a57baf',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 578,
+ 'timestamp': 1414749706,
+ 'upload_date': '20141031',
+ 'uploader': 'Robin Schweiger',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
+ webpage, 'video id')
+
+ playlist = self._download_json(
+ 'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
+ % video_id, video_id)[0]
+
+ quality = qualities(['normal', 'hd720'])
+
+ formats = []
+ for format_id in itertools.count(0):
+ fmt = playlist.get(compat_str(format_id))
+ if not fmt:
+ break
+ formats.append({
+ 'url': fmt['src'],
+ 'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
+ 'quality': quality(fmt['quality']),
+ })
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ duration = parse_duration(self._search_regex(
+ r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
+ webpage, 'duration', fatal=False))
+
+ timestamp = parse_iso8601(self._search_regex(
+ r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
+ uploader = self._search_regex(
+ r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
+
+ view_count = str_to_int(self._search_regex(
+ r'<span class="views"><strong>([\d.,]+)</strong>',
+ webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/gigya.py b/hypervideo_dl/extractor/gigya.py
new file mode 100644
index 0000000..4121784
--- /dev/null
+++ b/hypervideo_dl/extractor/gigya.py
@@ -0,0 +1,22 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class GigyaBaseIE(InfoExtractor):
+ def _gigya_login(self, auth_data):
+ auth_info = self._download_json(
+ 'https://accounts.eu1.gigya.com/accounts.login', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(auth_data))
+
+ error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
+ if error_message:
+ raise ExtractorError(
+ 'Unable to login: %s' % error_message, expected=True)
+ return auth_info
diff --git a/hypervideo_dl/extractor/glide.py b/hypervideo_dl/extractor/glide.py
new file mode 100644
index 0000000..d94dfbf
--- /dev/null
+++ b/hypervideo_dl/extractor/glide.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GlideIE(InfoExtractor):
+ IE_DESC = 'Glide mobile video messages (glide.me)'
+ _VALID_URL = r'https?://share\.glide\.me/(?P<id>[A-Za-z0-9\-=_+]+)'
+ _TEST = {
+ 'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==',
+ 'md5': '4466372687352851af2d131cfaa8a4c7',
+ 'info_dict': {
+ 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
+ 'ext': 'mp4',
+ 'title': "Damon's Glide message",
+ 'thumbnail': r're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>(.+?)</title>', webpage,
+ 'title', default=None) or self._og_search_title(webpage)
+ video_url = self._proto_relative_url(self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
+ webpage, 'video URL', default=None,
+ group='url')) or self._og_search_video_url(webpage)
+ thumbnail = self._proto_relative_url(self._search_regex(
+ r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
+ webpage, 'thumbnail url', default=None,
+ group='url')) or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/globo.py b/hypervideo_dl/extractor/globo.py
new file mode 100644
index 0000000..60d842d
--- /dev/null
+++ b/hypervideo_dl/extractor/globo.py
@@ -0,0 +1,240 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import hashlib
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ orderedSet,
+ str_or_none,
+)
+
+
+class GloboIE(InfoExtractor):
+ _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
+ _NETRC_MACHINE = 'globo'
+ _TESTS = [{
+ 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
+ 'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
+ 'info_dict': {
+ 'id': '3607726',
+ 'ext': 'mp4',
+ 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
+ 'duration': 103.204,
+ 'uploader': 'Globo.com',
+ 'uploader_id': '265',
+ },
+ }, {
+ 'url': 'http://globoplay.globo.com/v/4581987/',
+ 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
+ 'info_dict': {
+ 'id': '4581987',
+ 'ext': 'mp4',
+ 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
+ 'duration': 137.973,
+ 'uploader': 'Rede Globo',
+ 'uploader_id': '196',
+ },
+ }, {
+ 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'globo:3607726',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ try:
+ glb_id = (self._download_json(
+ 'https://login.globo.com/api/authentication', None, data=json.dumps({
+ 'payload': {
+ 'email': email,
+ 'password': password,
+ 'serviceId': 4654,
+ },
+ }).encode(), headers={
+ 'Content-Type': 'application/json; charset=utf-8',
+ }) or {}).get('glbId')
+ if glb_id:
+ self._set_cookie('.globo.com', 'GLBID', glb_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(e.cause.read(), None)
+ raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
+ raise
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api.globovideos.com/videos/%s/playlist' % video_id,
+ video_id)['videos'][0]
+ if video.get('encrypted') is True:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ title = video['title']
+
+ formats = []
+ subtitles = {}
+ for resource in video['resources']:
+ resource_id = resource.get('_id')
+ resource_url = resource.get('url')
+ resource_type = resource.get('type')
+ if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
+ continue
+
+ if resource_type == 'subtitle':
+ subtitles.setdefault(resource.get('language') or 'por', []).append({
+ 'url': resource_url,
+ })
+ continue
+
+ security = self._download_json(
+ 'http://security.video.globo.com/videos/%s/hash' % video_id,
+ video_id, 'Downloading security hash for %s' % resource_id, query={
+ 'player': 'desktop',
+ 'version': '5.19.1',
+ 'resource_id': resource_id,
+ })
+
+ security_hash = security.get('hash')
+ if not security_hash:
+ message = security.get('message')
+ if message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, message), expected=True)
+ continue
+
+ hash_code = security_hash[:2]
+ padding = '%010d' % random.randint(1, 10000000000)
+ if hash_code in ('04', '14'):
+ received_time = security_hash[3:13]
+ received_md5 = security_hash[24:]
+ hash_prefix = security_hash[:23]
+ elif hash_code in ('02', '12', '03', '13'):
+ received_time = security_hash[2:12]
+ received_md5 = security_hash[22:]
+ padding += '1'
+ hash_prefix = '05' + security_hash[:22]
+
+ padded_sign_time = compat_str(int(received_time) + 86400) + padding
+ md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
+ signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
+ signed_hash = hash_prefix + padded_sign_time + signed_md5
+ signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
+
+ if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
+ formats.extend(self._extract_mpd_formats(
+ signed_url, resource_id, mpd_id='dash', fatal=False))
+ elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
+ formats.extend(self._extract_ism_formats(
+ signed_url, resource_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'url': signed_url,
+ 'format_id': 'http-%s' % resource_id,
+ 'height': int_or_none(resource.get('height')),
+ })
+
+ self._sort_formats(formats)
+
+ duration = float_or_none(video.get('duration'), 1000)
+ uploader = video.get('channel')
+ uploader_id = str_or_none(video.get('channel_id'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class GloboArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
+
+ _VIDEOID_REGEXES = [
+ r'\bdata-video-id=["\'](\d{7,})',
+ r'\bdata-player-videosids=["\'](\d{7,})',
+ r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
+ r'\bdata-id=["\'](\d{7,})',
+ r'<div[^>]+\bid=["\'](\d{7,})',
+ ]
+
+ _TESTS = [{
+ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
+ 'info_dict': {
+ 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
+ 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
+ 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
+ 'info_dict': {
+ 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
+ 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
+ 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_ids = []
+ for video_regex in self._VIDEOID_REGEXES:
+ video_ids.extend(re.findall(video_regex, webpage))
+ entries = [
+ self.url_result('globo:%s' % video_id, GloboIE.ie_key())
+ for video_id in orderedSet(video_ids)]
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._html_search_meta('description', webpage)
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/hypervideo_dl/extractor/go.py b/hypervideo_dl/extractor/go.py
new file mode 100644
index 0000000..878ba14
--- /dev/null
+++ b/hypervideo_dl/extractor/go.py
@@ -0,0 +1,315 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .adobepass import AdobePassIE
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ parse_age_limit,
+ try_get,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class GoIE(AdobePassIE):
+ _SITE_INFO = {
+ 'abc': {
+ 'brand': '001',
+ 'requestor_id': 'ABC',
+ },
+ 'freeform': {
+ 'brand': '002',
+ 'requestor_id': 'ABCFamily',
+ },
+ 'watchdisneychannel': {
+ 'brand': '004',
+ 'resource_id': 'Disney',
+ },
+ 'watchdisneyjunior': {
+ 'brand': '008',
+ 'resource_id': 'DisneyJunior',
+ },
+ 'watchdisneyxd': {
+ 'brand': '009',
+ 'resource_id': 'DisneyXD',
+ },
+ 'disneynow': {
+ 'brand': '011',
+ 'resource_id': 'Disney',
+ },
+ 'fxnow.fxnetworks': {
+ 'brand': '025',
+ 'requestor_id': 'dtci',
+ },
+ }
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?P<sub_domain>%s)\.)?go|
+ (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
+ )\.com/
+ (?:
+ (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
+ (?:[^/]+/)*(?P<display_id>[^/?\#]+)
+ )
+ ''' % '|'.join(list(_SITE_INFO.keys()))
+ _TESTS = [{
+ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
+ 'info_dict': {
+ 'id': 'VDKA3807643',
+ 'ext': 'mp4',
+ 'title': 'The Traitor in the White House',
+ 'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'This content is no longer available.',
+ }, {
+ 'url': 'http://watchdisneyxd.go.com/doraemon',
+ 'info_dict': {
+ 'title': 'Doraemon',
+ 'id': 'SH55574025',
+ },
+ 'playlist_mincount': 51,
+ }, {
+ 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
+ 'info_dict': {
+ 'id': 'VDKA3609139',
+ 'ext': 'mp4',
+ 'title': 'This Guilty Blood',
+ 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
+ 'info_dict': {
+ 'id': 'VDKA13435179',
+ 'ext': 'mp4',
+ 'title': 'The Bet',
+ 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
+ 'info_dict': {
+ 'id': 'VDKA12782841',
+ 'ext': 'mp4',
+ 'title': 'First Look: Better Things - Season 2',
+ 'description': 'md5:fa73584a95761c605d9d54904e35b407',
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
+ 'info_dict': {
+ 'id': 'VDKA22600213',
+ 'ext': 'mp4',
+ 'title': 'Pilot',
+ 'description': 'md5:74306df917cfc199d76d061d66bebdb4',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
+ 'only_matching': True,
+ }, {
+ # brand 004
+ 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
+ 'only_matching': True,
+ }, {
+ # brand 008
+ 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
+ }]
+
+ def _extract_videos(self, brand, video_id='-1', show_id='-1'):
+ display_id = video_id if video_id != '-1' else show_id
+ return self._download_json(
+ 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
+ display_id)['video']
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
+ video_id, display_id = mobj.group('id', 'display_id')
+ site_info = self._SITE_INFO.get(sub_domain, {})
+ brand = site_info.get('brand')
+ if not video_id or not site_info:
+ webpage = self._download_webpage(url, display_id or video_id)
+ data = self._parse_json(
+ self._search_regex(
+ r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
+ 'data', default='{}'),
+ display_id or video_id, fatal=False)
+ # https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
+ layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
+ video_id = None
+ if layout:
+ video_id = try_get(
+ layout,
+ (lambda x: x['videoid'], lambda x: x['video']['id']),
+ compat_str)
+ if not video_id:
+ video_id = self._search_regex(
+ (
+ # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
+ # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
+ r'data-video-id=["\']*(VDKA\w+)',
+ # page.analytics.videoIdCode
+ r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
+ # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
+ r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
+ ), webpage, 'video id', default=video_id)
+ if not site_info:
+ brand = self._search_regex(
+ (r'data-brand=\s*["\']\s*(\d+)',
+ r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
+ default='004')
+ site_info = next(
+ si for _, si in self._SITE_INFO.items()
+ if si.get('brand') == brand)
+ if not video_id:
+ # show extraction works for Disney, DisneyJunior and DisneyXD
+ # ABC and Freeform has different layout
+ show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
+ videos = self._extract_videos(brand, show_id=show_id)
+ show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
+ entries = []
+ for video in videos:
+ entries.append(self.url_result(
+ video['url'], 'Go', video.get('id'), video.get('title')))
+ entries.reverse()
+ return self.playlist_result(entries, show_id, show_title)
+ video_data = self._extract_videos(brand, video_id)[0]
+ video_id = video_data['id']
+ title = video_data['title']
+
+ formats = []
+ for asset in video_data.get('assets', {}).get('asset', []):
+ asset_url = asset.get('value')
+ if not asset_url:
+ continue
+ format_id = asset.get('format')
+ ext = determine_ext(asset_url)
+ if ext == 'm3u8':
+ video_type = video_data.get('type')
+ data = {
+ 'video_id': video_data['id'],
+ 'video_type': video_type,
+ 'brand': brand,
+ 'device': '001',
+ }
+ if video_data.get('accesslevel') == '1':
+ requestor_id = site_info.get('requestor_id', 'DisneyChannels')
+ resource = site_info.get('resource_id') or self._get_mvpd_resource(
+ requestor_id, title, video_id, None)
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ data.update({
+ 'token': auth,
+ 'token_type': 'ap',
+ 'adobe_requestor_id': requestor_id,
+ })
+ else:
+ self._initialize_geo_bypass({'countries': ['US']})
+ entitlement = self._download_json(
+ 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
+ video_id, data=urlencode_postdata(data))
+ errors = entitlement.get('errors', {}).get('errors', [])
+ if errors:
+ for error in errors:
+ if error.get('code') == 1002:
+ self.raise_geo_restricted(
+ error['message'], countries=['US'])
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ asset_url += '?' + entitlement['uplynkData']['sessionKey']
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
+ else:
+ f = {
+ 'format_id': format_id,
+ 'url': asset_url,
+ 'ext': ext,
+ }
+ if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
+ 'preference': 1,
+ })
+ else:
+ mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
+ if mobj:
+ height = int(mobj.group(2))
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
+ 'width': int(mobj.group(1)),
+ 'height': height,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for cc in video_data.get('closedcaption', {}).get('src', []):
+ cc_url = cc.get('value')
+ if not cc_url:
+ continue
+ ext = determine_ext(cc_url)
+ if ext == 'xml':
+ ext = 'ttml'
+ subtitles.setdefault(cc.get('lang'), []).append({
+ 'url': cc_url,
+ 'ext': ext,
+ })
+
+ thumbnails = []
+ for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
+ thumbnail_url = thumbnail.get('value')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('longdescription') or video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
+ 'episode_number': int_or_none(video_data.get('episodenumber')),
+ 'series': video_data.get('show', {}).get('title'),
+ 'season_number': int_or_none(video_data.get('season', {}).get('num')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/godtube.py b/hypervideo_dl/extractor/godtube.py
new file mode 100644
index 0000000..92efd16
--- /dev/null
+++ b/hypervideo_dl/extractor/godtube.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class GodTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
+ 'md5': '77108c1e4ab58f48031101a1a2119789',
+ 'info_dict': {
+ 'id': '0C0CNNNU',
+ 'ext': 'mp4',
+ 'title': 'Woman at the well.',
+ 'duration': 159,
+ 'timestamp': 1205712000,
+ 'uploader': 'beverlybmusic',
+ 'upload_date': '20080317',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ config = self._download_xml(
+ 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
+ video_id, 'Downloading player config XML')
+
+ video_url = config.find('file').text
+ uploader = config.find('author').text
+ timestamp = parse_iso8601(config.find('date').text)
+ duration = parse_duration(config.find('duration').text)
+ thumbnail = config.find('image').text
+
+ media = self._download_xml(
+ 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
+
+ title = media.find('title').text
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/golem.py b/hypervideo_dl/extractor/golem.py
new file mode 100644
index 0000000..47a068e
--- /dev/null
+++ b/hypervideo_dl/extractor/golem.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+)
+
+
+class GolemIE(InfoExtractor):
+ _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
+ _TEST = {
+ 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
+ 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
+ 'info_dict': {
+ 'id': '14095',
+ 'format_id': 'high',
+ 'ext': 'mp4',
+ 'title': 'iPhone 6 und 6 Plus - Test',
+ 'duration': 300.44,
+ 'filesize': 65309548,
+ }
+ }
+
+ _PREFIX = 'http://video.golem.de'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_xml(
+ 'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
+
+ info = {
+ 'id': video_id,
+ 'title': config.findtext('./title', 'golem'),
+ 'duration': self._float(config.findtext('./playtime'), 'duration'),
+ }
+
+ formats = []
+ for e in config:
+ url = e.findtext('./url')
+ if not url:
+ continue
+
+ formats.append({
+ 'format_id': compat_str(e.tag),
+ 'url': compat_urlparse.urljoin(self._PREFIX, url),
+ 'height': self._int(e.get('height'), 'height'),
+ 'width': self._int(e.get('width'), 'width'),
+ 'filesize': self._int(e.findtext('filesize'), 'filesize'),
+ 'ext': determine_ext(e.findtext('./filename')),
+ })
+ self._sort_formats(formats)
+ info['formats'] = formats
+
+ thumbnails = []
+ for e in config.findall('.//teaser'):
+ url = e.findtext('./url')
+ if not url:
+ continue
+ thumbnails.append({
+ 'url': compat_urlparse.urljoin(self._PREFIX, url),
+ 'width': self._int(e.get('width'), 'thumbnail width'),
+ 'height': self._int(e.get('height'), 'thumbnail height'),
+ })
+ info['thumbnails'] = thumbnails
+
+ return info
diff --git a/hypervideo_dl/extractor/googledrive.py b/hypervideo_dl/extractor/googledrive.py
new file mode 100644
index 0000000..3f2de00
--- /dev/null
+++ b/hypervideo_dl/extractor/googledrive.py
@@ -0,0 +1,278 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ get_element_by_class,
+ int_or_none,
+ lowercase_escape,
+ try_get,
+ update_url_query,
+)
+
+
+class GoogleDriveIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:docs|drive)\.google\.com/
+ (?:
+ (?:uc|open)\?.*?id=|
+ file/d/
+ )|
+ video\.google\.com/get_player\?.*?docid=
+ )
+ (?P<id>[a-zA-Z0-9_-]{28,})
+ '''
+ _TESTS = [{
+ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
+ 'md5': '5c602afbbf2c1db91831f5d82f678554',
+ 'info_dict': {
+ 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny.mp4',
+ 'duration': 45,
+ }
+ }, {
+ # video can't be watched anonymously due to view count limit reached,
+ # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
+ 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
+ 'only_matching': True,
+ }, {
+ # video id is longer than 28 characters
+ 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+ 'only_matching': True,
+ }]
+ _FORMATS_EXT = {
+ '5': 'flv',
+ '6': 'flv',
+ '13': '3gp',
+ '17': '3gp',
+ '18': 'mp4',
+ '22': 'mp4',
+ '34': 'flv',
+ '35': 'flv',
+ '36': '3gp',
+ '37': 'mp4',
+ '38': 'mp4',
+ '43': 'webm',
+ '44': 'webm',
+ '45': 'webm',
+ '46': 'webm',
+ '59': 'mp4',
+ }
+ _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
+ _CAPTIONS_ENTRY_TAG = {
+ 'subtitles': 'track',
+ 'automatic_captions': 'target',
+ }
+ _caption_formats_ext = []
+ _captions_xml = None
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
+ webpage)
+ if mobj:
+ return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+ def _download_subtitles_xml(self, video_id, subtitles_id, hl):
+ if self._captions_xml:
+ return
+ self._captions_xml = self._download_xml(
+ self._BASE_URL_CAPTIONS, video_id, query={
+ 'id': video_id,
+ 'vid': subtitles_id,
+ 'hl': hl,
+ 'v': video_id,
+ 'type': 'list',
+ 'tlangs': '1',
+ 'fmts': '1',
+ 'vssids': '1',
+ }, note='Downloading subtitles XML',
+ errnote='Unable to download subtitles XML', fatal=False)
+ if self._captions_xml:
+ for f in self._captions_xml.findall('format'):
+ if f.attrib.get('fmt_code') and not f.attrib.get('default'):
+ self._caption_formats_ext.append(f.attrib['fmt_code'])
+
+ def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
+ origin_lang_code=None):
+ if not subtitles_id or not caption_type:
+ return
+ captions = {}
+ for caption_entry in self._captions_xml.findall(
+ self._CAPTIONS_ENTRY_TAG[caption_type]):
+ caption_lang_code = caption_entry.attrib.get('lang_code')
+ if not caption_lang_code:
+ continue
+ caption_format_data = []
+ for caption_format in self._caption_formats_ext:
+ query = {
+ 'vid': subtitles_id,
+ 'v': video_id,
+ 'fmt': caption_format,
+ 'lang': (caption_lang_code if origin_lang_code is None
+ else origin_lang_code),
+ 'type': 'track',
+ 'name': '',
+ 'kind': '',
+ }
+ if origin_lang_code is not None:
+ query.update({'tlang': caption_lang_code})
+ caption_format_data.append({
+ 'url': update_url_query(self._BASE_URL_CAPTIONS, query),
+ 'ext': caption_format,
+ })
+ captions[caption_lang_code] = caption_format_data
+ return captions
+
+ def _get_subtitles(self, video_id, subtitles_id, hl):
+ if not subtitles_id or not hl:
+ return
+ self._download_subtitles_xml(video_id, subtitles_id, hl)
+ if not self._captions_xml:
+ return
+ return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
+
+ def _get_automatic_captions(self, video_id, subtitles_id, hl):
+ if not subtitles_id or not hl:
+ return
+ self._download_subtitles_xml(video_id, subtitles_id, hl)
+ if not self._captions_xml:
+ return
+ track = self._captions_xml.find('track')
+ if track is None:
+ return
+ origin_lang_code = track.attrib.get('lang_code')
+ if not origin_lang_code:
+ return
+ return self._get_captions_by_type(
+ video_id, subtitles_id, 'automatic_captions', origin_lang_code)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_info = compat_parse_qs(self._download_webpage(
+ 'https://drive.google.com/get_video_info',
+ video_id, query={'docid': video_id}))
+
+ def get_value(key):
+ return try_get(video_info, lambda x: x[key][0])
+
+ reason = get_value('reason')
+ title = get_value('title')
+ if not title and reason:
+ raise ExtractorError(reason, expected=True)
+
+ formats = []
+ fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
+ fmt_list = (get_value('fmt_list') or '').split(',')
+ if fmt_stream_map and fmt_list:
+ resolutions = {}
+ for fmt in fmt_list:
+ mobj = re.search(
+ r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
+ if mobj:
+ resolutions[mobj.group('format_id')] = (
+ int(mobj.group('width')), int(mobj.group('height')))
+
+ for fmt_stream in fmt_stream_map:
+ fmt_stream_split = fmt_stream.split('|')
+ if len(fmt_stream_split) < 2:
+ continue
+ format_id, format_url = fmt_stream_split[:2]
+ f = {
+ 'url': lowercase_escape(format_url),
+ 'format_id': format_id,
+ 'ext': self._FORMATS_EXT[format_id],
+ }
+ resolution = resolutions.get(format_id)
+ if resolution:
+ f.update({
+ 'width': resolution[0],
+ 'height': resolution[1],
+ })
+ formats.append(f)
+
+ source_url = update_url_query(
+ 'https://drive.google.com/uc', {
+ 'id': video_id,
+ 'export': 'download',
+ })
+
+ def request_source_file(source_url, kind):
+ return self._request_webpage(
+ source_url, video_id, note='Requesting %s file' % kind,
+ errnote='Unable to request %s file' % kind, fatal=False)
+ urlh = request_source_file(source_url, 'source')
+ if urlh:
+ def add_source_format(urlh):
+ formats.append({
+ # Use redirect URLs as download URLs in order to calculate
+ # correct cookies in _calc_cookies.
+ # Using original URLs may result in redirect loop due to
+ # google.com's cookies mistakenly used for googleusercontent.com
+ # redirect URLs (see #23919).
+ 'url': urlh.geturl(),
+ 'ext': determine_ext(title, 'mp4').lower(),
+ 'format_id': 'source',
+ 'quality': 1,
+ })
+ if urlh.headers.get('Content-Disposition'):
+ add_source_format(urlh)
+ else:
+ confirmation_webpage = self._webpage_read_content(
+ urlh, url, video_id, note='Downloading confirmation page',
+ errnote='Unable to confirm download', fatal=False)
+ if confirmation_webpage:
+ confirm = self._search_regex(
+ r'confirm=([^&"\']+)', confirmation_webpage,
+ 'confirmation code', default=None)
+ if confirm:
+ confirmed_source_url = update_url_query(source_url, {
+ 'confirm': confirm,
+ })
+ urlh = request_source_file(confirmed_source_url, 'confirmed source')
+ if urlh and urlh.headers.get('Content-Disposition'):
+ add_source_format(urlh)
+ else:
+ self.report_warning(
+ get_element_by_class('uc-error-subcaption', confirmation_webpage)
+ or get_element_by_class('uc-error-caption', confirmation_webpage)
+ or 'unable to extract confirmation code')
+
+ if not formats and reason:
+ raise ExtractorError(reason, expected=True)
+
+ self._sort_formats(formats)
+
+ hl = get_value('hl')
+ subtitles_id = None
+ ttsurl = get_value('ttsurl')
+ if ttsurl:
+ # the video Id for subtitles will be the last value in the ttsurl
+ # query string
+ subtitles_id = ttsurl.encode('utf-8').decode(
+ 'unicode_escape').split('=')[-1]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
+ 'duration': int_or_none(get_value('length_seconds')),
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
+ 'automatic_captions': self.extract_automatic_captions(
+ video_id, subtitles_id, hl),
+ }
diff --git a/hypervideo_dl/extractor/googlepodcasts.py b/hypervideo_dl/extractor/googlepodcasts.py
new file mode 100644
index 0000000..31ad799
--- /dev/null
+++ b/hypervideo_dl/extractor/googlepodcasts.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class GooglePodcastsBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
+
+ def _batch_execute(self, func_id, video_id, params):
+ return json.loads(self._download_json(
+ 'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
+ video_id, data=urlencode_postdata({
+ 'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
+ }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
+
+ def _extract_episode(self, episode):
+ return {
+ 'id': episode[4][3],
+ 'title': episode[8],
+ 'url': clean_podcast_url(episode[13]),
+ 'thumbnail': episode[2],
+ 'description': episode[9],
+ 'creator': try_get(episode, lambda x: x[14]),
+ 'timestamp': int_or_none(episode[11]),
+ 'duration': int_or_none(episode[12]),
+ 'series': episode[1],
+ }
+
+
+class GooglePodcastsIE(GooglePodcastsBaseIE):
+ IE_NAME = 'google:podcasts'
+ _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
+ 'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
+ 'info_dict': {
+ 'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
+ 'ext': 'mp3',
+ 'title': 'WWDTM New Year 2021',
+ 'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
+ 'upload_date': '20210102',
+ 'timestamp': 1609606800,
+ 'duration': 2901,
+ 'series': "Wait Wait... Don't Tell Me!",
+ }
+ }
+
+ def _real_extract(self, url):
+ b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
+ episode = self._batch_execute(
+ 'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
+ return self._extract_episode(episode)
+
+
+class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
+ IE_NAME = 'google:podcasts:feed'
+ _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
+ _TEST = {
+ 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
+ 'info_dict': {
+ 'title': "Wait Wait... Don't Tell Me!",
+ 'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
+ },
+ 'playlist_mincount': 20,
+ }
+
+ def _real_extract(self, url):
+ b64_feed_url = self._match_id(url)
+ data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
+
+ entries = []
+ for episode in (try_get(data, lambda x: x[1][0]) or []):
+ entries.append(self._extract_episode(episode))
+
+ feed = try_get(data, lambda x: x[3]) or []
+ return self.playlist_result(
+ entries, playlist_title=try_get(feed, lambda x: x[0]),
+ playlist_description=try_get(feed, lambda x: x[2]))
diff --git a/hypervideo_dl/extractor/googlesearch.py b/hypervideo_dl/extractor/googlesearch.py
new file mode 100644
index 0000000..5279fa8
--- /dev/null
+++ b/hypervideo_dl/extractor/googlesearch.py
@@ -0,0 +1,59 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import SearchInfoExtractor
+
+
+class GoogleSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Google Video search'
+ _MAX_RESULTS = 1000
+ IE_NAME = 'video.google:search'
+ _SEARCH_KEY = 'gvsearch'
+ _TEST = {
+ 'url': 'gvsearch15:python language',
+ 'info_dict': {
+ 'id': 'python language',
+ 'title': 'python language',
+ },
+ 'playlist_count': 15,
+ }
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+
+ entries = []
+ res = {
+ '_type': 'playlist',
+ 'id': query,
+ 'title': query,
+ }
+
+ for pagenum in itertools.count():
+ webpage = self._download_webpage(
+ 'http://www.google.com/search',
+ 'gvsearch:' + query,
+ note='Downloading result page %s' % (pagenum + 1),
+ query={
+ 'tbm': 'vid',
+ 'q': query,
+ 'start': pagenum * 10,
+ 'hl': 'en',
+ })
+
+ for hit_idx, mobj in enumerate(re.finditer(
+ r'<h3 class="r"><a href="([^"]+)"', webpage)):
+
+ # Skip playlists
+ if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
+ continue
+
+ entries.append({
+ '_type': 'url',
+ 'url': mobj.group(1)
+ })
+
+ if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
+ res['entries'] = entries[:n]
+ return res
diff --git a/hypervideo_dl/extractor/goshgay.py b/hypervideo_dl/extractor/goshgay.py
new file mode 100644
index 0000000..377981d
--- /dev/null
+++ b/hypervideo_dl/extractor/goshgay.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+)
+from ..utils import (
+ parse_duration,
+)
+
+
+class GoshgayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
+ _TEST = {
+ 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
+ 'md5': '4b6db9a0a333142eb9f15913142b0ed1',
+ 'info_dict': {
+ 'id': '299069',
+ 'ext': 'flv',
+ 'title': 'DIESEL SFW XXX Video',
+ 'thumbnail': r're:^http://.*\.jpg$',
+ 'duration': 80,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h2>(.*?)<', webpage, 'title')
+ duration = parse_duration(self._html_search_regex(
+ r'<span class="duration">\s*-?\s*(.*?)</span>',
+ webpage, 'duration', fatal=False))
+
+ flashvars = compat_parse_qs(self._html_search_regex(
+ r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
+ webpage, 'flashvars'))
+ thumbnail = flashvars.get('url_bigthumb', [None])[0]
+ video_url = flashvars['flv_url'][0]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/gputechconf.py b/hypervideo_dl/extractor/gputechconf.py
new file mode 100644
index 0000000..73dc62c
--- /dev/null
+++ b/hypervideo_dl/extractor/gputechconf.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GPUTechConfIE(InfoExtractor):
+ _VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
+ 'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
+ 'info_dict': {
+ 'id': '5156',
+ 'ext': 'mp4',
+ 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
+ 'duration': 1219,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ root_path = self._search_regex(
+ r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
+ default='http://evt.dispeak.com/nvidia/events/gtc15/')
+ xml_file_id = self._search_regex(
+ r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': '%sxml/%s.xml' % (root_path, xml_file_id),
+ 'ie_key': 'DigitallySpeaking',
+ }
diff --git a/hypervideo_dl/extractor/groupon.py b/hypervideo_dl/extractor/groupon.py
new file mode 100644
index 0000000..a6da909
--- /dev/null
+++ b/hypervideo_dl/extractor/groupon.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GrouponIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+ 'info_dict': {
+ 'id': 'bikram-yoga-huntington-beach-2',
+ 'title': '$49 for 10 Yoga Classes or One Month of Unlimited Classes at Bikram Yoga Huntington Beach ($180 Value)',
+ 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
+ },
+ 'playlist': [{
+ 'md5': '42428ce8a00585f9bc36e49226eae7a1',
+ 'info_dict': {
+ 'id': 'fk6OhWpXgIQ',
+ 'ext': 'mp4',
+ 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 45,
+ 'upload_date': '20160405',
+ 'uploader_id': 'groupon',
+ 'uploader': 'Groupon',
+ },
+ 'add_ie': ['Youtube'],
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ _PROVIDERS = {
+ 'ooyala': ('ooyala:%s', 'Ooyala'),
+ 'youtube': ('%s', 'Youtube'),
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ payload = self._parse_json(self._search_regex(
+ r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
+ videos = payload['carousel'].get('dealVideos', [])
+ entries = []
+ for v in videos:
+ provider = v.get('provider')
+ video_id = v.get('media') or v.get('id') or v.get('baseURL')
+ if not provider or not video_id:
+ continue
+ url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
+ if not url_pattern:
+ self.report_warning(
+ '%s: Unsupported video provider %s, skipping video' %
+ (playlist_id, provider))
+ continue
+ entries.append(self.url_result(url_pattern % video_id, ie_key))
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'entries': entries,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/hypervideo_dl/extractor/hbo.py b/hypervideo_dl/extractor/hbo.py
new file mode 100644
index 0000000..68df748
--- /dev/null
+++ b/hypervideo_dl/extractor/hbo.py
@@ -0,0 +1,175 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ xpath_element,
+ int_or_none,
+ parse_duration,
+ urljoin,
+)
+
+
+class HBOBaseIE(InfoExtractor):
+ _FORMATS_INFO = {
+ 'pro7': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ '1920': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ 'pro6': {
+ 'width': 768,
+ 'height': 432,
+ },
+ '640': {
+ 'width': 768,
+ 'height': 432,
+ },
+ 'pro5': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'highwifi': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'high3g': {
+ 'width': 640,
+ 'height': 360,
+ },
+ 'medwifi': {
+ 'width': 400,
+ 'height': 224,
+ },
+ 'med3g': {
+ 'width': 400,
+ 'height': 224,
+ },
+ }
+
+ def _extract_info(self, url, display_id):
+ video_data = self._download_xml(url, display_id)
+ video_id = xpath_text(video_data, 'id', fatal=True)
+ episode_title = title = xpath_text(video_data, 'title', fatal=True)
+ series = xpath_text(video_data, 'program')
+ if series:
+ title = '%s - %s' % (series, title)
+
+ formats = []
+ for source in xpath_element(video_data, 'videos', 'sources', True):
+ if source.tag == 'size':
+ path = xpath_text(source, './/path')
+ if not path:
+ continue
+ width = source.attrib.get('width')
+ format_info = self._FORMATS_INFO.get(width, {})
+ height = format_info.get('height')
+ fmt = {
+ 'url': path,
+ 'format_id': 'http%s' % ('-%dp' % height if height else ''),
+ 'width': format_info.get('width'),
+ 'height': height,
+ }
+ rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': fmt['format_id'].replace('http', 'rtmp'),
+ })
+ formats.append(fmt)
+ else:
+ video_url = source.text
+ if not video_url:
+ continue
+ if source.tag == 'tarball':
+ formats.extend(self._extract_m3u8_formats(
+ video_url.replace('.tar', '/base_index_w8.m3u8'),
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif source.tag == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url.replace('.tar', '/base_index.m3u8'),
+ video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ if f.get('vcodec') == 'none' and not f.get('tbr'):
+ f['tbr'] = int_or_none(self._search_regex(
+ r'-(\d+)k/', f['url'], 'tbr', default=None))
+ formats.extend(m3u8_formats)
+ elif source.tag == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ video_url.replace('.tar', '/manifest.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ else:
+ format_info = self._FORMATS_INFO.get(source.tag, {})
+ formats.append({
+ 'format_id': 'http-%s' % source.tag,
+ 'url': video_url,
+ 'width': format_info.get('width'),
+ 'height': format_info.get('height'),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ card_sizes = xpath_element(video_data, 'titleCardSizes')
+ if card_sizes is not None:
+ for size in card_sizes:
+ path = xpath_text(size, 'path')
+ if not path:
+ continue
+ width = int_or_none(size.get('width'))
+ thumbnails.append({
+ 'id': width,
+ 'url': path,
+ 'width': width,
+ })
+
+ subtitles = None
+ caption_url = xpath_text(video_data, 'captionUrl')
+ if caption_url:
+ subtitles = {
+ 'en': [{
+ 'url': caption_url,
+ 'ext': 'ttml'
+ }],
+ }
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
+ 'series': series,
+ 'episode': episode_title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ }
+
+
+class HBOIE(HBOBaseIE):
+ IE_NAME = 'hbo'
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
+ 'md5': '8126210656f433c452a21367f9ad85b3',
+ 'info_dict': {
+ 'id': '22113301',
+ 'ext': 'mp4',
+ 'title': 'Game of Thrones - Trailer',
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ location_path = self._parse_json(self._html_search_regex(
+ r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
+ return self._extract_info(urljoin(url, location_path), display_id)
diff --git a/hypervideo_dl/extractor/hearthisat.py b/hypervideo_dl/extractor/hearthisat.py
new file mode 100644
index 0000000..18c2520
--- /dev/null
+++ b/hypervideo_dl/extractor/hearthisat.py
@@ -0,0 +1,135 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ HEADRequest,
+ KNOWN_EXTENSIONS,
+ sanitized_Request,
+ str_to_int,
+ urlencode_postdata,
+ urlhandle_detect_ext,
+)
+
+
+class HearThisAtIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
+ _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
+ _TESTS = [{
+ 'url': 'https://hearthis.at/moofi/dr-kreep',
+ 'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
+ 'info_dict': {
+ 'id': '150939',
+ 'ext': 'wav',
+ 'title': 'Moofi - Dr. Kreep',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421564134,
+ 'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
+ 'upload_date': '20150118',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 71,
+ 'categories': ['Experimental'],
+ }
+ }, {
+ # 'download' link redirects to the original webpage
+ 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
+ 'md5': '5980ceb7c461605d30f1f039df160c6e',
+ 'info_dict': {
+ 'id': '811296',
+ 'ext': 'mp3',
+ 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
+ 'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
+ 'upload_date': '20160328',
+ 'timestamp': 1459186146,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 4360,
+ 'categories': ['Dance'],
+ },
+ }]
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
+
+ webpage = self._download_webpage(url, display_id)
+ track_id = self._search_regex(
+ r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
+
+ payload = urlencode_postdata({'tracks[]': track_id})
+ req = sanitized_Request(self._PLAYLIST_URL, payload)
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+ track = self._download_json(req, track_id, 'Downloading playlist')[0]
+ title = '{artist:s} - {title:s}'.format(**track)
+
+ categories = None
+ if track.get('category'):
+ categories = [track['category']]
+
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
+ view_count = str_to_int(self._search_regex(
+ meta_span % 'plays_count', webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ meta_span % 'likes_count', webpage, 'like count', fatal=False))
+ comment_count = str_to_int(self._search_regex(
+ meta_span % 'comment_count', webpage, 'comment count', fatal=False))
+ duration = str_to_int(self._search_regex(
+ r'data-length="(\d+)', webpage, 'duration', fatal=False))
+ timestamp = str_to_int(self._search_regex(
+ r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
+
+ formats = []
+ mp3_url = self._search_regex(
+ r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
+ webpage, 'mp3 URL', fatal=False)
+ if mp3_url:
+ formats.append({
+ 'format_id': 'mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'url': mp3_url,
+ })
+ download_path = self._search_regex(
+ r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
+ webpage, 'download URL', default=None)
+ if download_path:
+ download_url = compat_urlparse.urljoin(url, download_path)
+ ext_req = HEADRequest(download_url)
+ ext_handle = self._request_webpage(
+ ext_req, display_id, note='Determining extension')
+ ext = urlhandle_detect_ext(ext_handle)
+ if ext in KNOWN_EXTENSIONS:
+ formats.append({
+ 'format_id': 'download',
+ 'vcodec': 'none',
+ 'ext': ext,
+ 'url': download_url,
+ 'preference': 2, # Usually better quality
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'like_count': like_count,
+ 'categories': categories,
+ }
diff --git a/hypervideo_dl/extractor/heise.py b/hypervideo_dl/extractor/heise.py
new file mode 100644
index 0000000..cbe564a
--- /dev/null
+++ b/hypervideo_dl/extractor/heise.py
@@ -0,0 +1,172 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from .youtube import YoutubeIE
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ NO_DEFAULT,
+ parse_iso8601,
+ smuggle_url,
+ xpath_text,
+)
+
+
+class HeiseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
+ _TESTS = [{
+ # kaltura embed
+ 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
+ 'info_dict': {
+ 'id': '1_kkrq94sm',
+ 'ext': 'mp4',
+ 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
+ 'timestamp': 1512734959,
+ 'upload_date': '20171208',
+ 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # YouTube embed
+ 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
+ 'md5': 'e403d2b43fea8e405e88e3f8623909f1',
+ 'info_dict': {
+ 'id': '6kmWbXleKW4',
+ 'ext': 'mp4',
+ 'title': 'NEU IM SEPTEMBER | Netflix',
+ 'description': 'md5:2131f3c7525e540d5fd841de938bd452',
+ 'upload_date': '20170830',
+ 'uploader': 'Netflix Deutschland, Österreich und Schweiz',
+ 'uploader_id': 'netflixdach',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
+ 'info_dict': {
+ 'id': '1_ntrmio2s',
+ 'ext': 'mp4',
+ 'title': "nachgehakt: Wie sichert das c't-Tool Restric'tor Windows 10 ab?",
+ 'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
+ 'timestamp': 1512470717,
+ 'upload_date': '20171205',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
+ 'info_dict': {
+ 'id': '1_59mk80sf',
+ 'ext': 'mp4',
+ 'title': "c't uplink 20.8: Staubsaugerroboter Xiaomi Vacuum 2, AR-Brille Meta 2 und Android rooten",
+ 'description': 'md5:f50fe044d3371ec73a8f79fcebd74afc',
+ 'timestamp': 1517567237,
+ 'upload_date': '20180202',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ def extract_title(default=NO_DEFAULT):
+ title = self._html_search_meta(
+ ('fulltitle', 'title'), webpage, default=None)
+ if not title or title == "c't":
+ title = self._search_regex(
+ r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+ webpage, 'title', default=None)
+ if not title:
+ title = self._html_search_regex(
+ r'<h1[^>]+\bclass=["\']article_page_title[^>]+>(.+?)<',
+ webpage, 'title', default=default)
+ return title
+
+ title = extract_title(default=None)
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage)
+
+ def _make_kaltura_result(kaltura_url):
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(kaltura_url, {'source_url': url}),
+ 'ie_key': KalturaIE.ie_key(),
+ 'title': title,
+ 'description': description,
+ }
+
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return _make_kaltura_result(kaltura_url)
+
+ kaltura_id = self._search_regex(
+ r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
+ default=None, group='id')
+ if kaltura_id:
+ return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
+
+ yt_urls = YoutubeIE._extract_urls(webpage)
+ if yt_urls:
+ return self.playlist_from_matches(
+ yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+
+ title = extract_title()
+
+ container_id = self._search_regex(
+ r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
+ webpage, 'container ID')
+
+ sequenz_id = self._search_regex(
+ r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
+ webpage, 'sequenz ID')
+
+ doc = self._download_xml(
+ 'http://www.heise.de/videout/feed', video_id, query={
+ 'container': container_id,
+ 'sequenz': sequenz_id,
+ })
+
+ formats = []
+ for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
+ label = source_node.attrib['label']
+ height = int_or_none(self._search_regex(
+ r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
+ video_url = source_node.attrib['file']
+ ext = determine_ext(video_url, '')
+ formats.append({
+ 'url': video_url,
+ 'format_note': label,
+ 'format_id': '%s_%s' % (ext, label),
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image')
+ or self._og_search_thumbnail(webpage)),
+ 'timestamp': parse_iso8601(
+ self._html_search_meta('date', webpage)),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/hellporno.py b/hypervideo_dl/extractor/hellporno.py
new file mode 100644
index 0000000..fae4251
--- /dev/null
+++ b/hypervideo_dl/extractor/hellporno.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ remove_end,
+ unified_timestamp,
+)
+
+
+class HellPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
+ 'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
+ 'info_dict': {
+ 'id': '149116',
+ 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
+ 'ext': 'mp4',
+ 'title': 'Dixie is posing with naked ass very erotic',
+ 'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
+ 'categories': list,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 240,
+ 'timestamp': 1398762720,
+ 'upload_date': '20140429',
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://hellporno.net/v/186271/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = remove_end(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
+
+ info = self._parse_html5_media_entries(url, webpage, display_id)[0]
+ self._sort_formats(info['formats'])
+
+ video_id = self._search_regex(
+ (r'chs_object\s*=\s*["\'](\d+)',
+ r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+ description = self._search_regex(
+ r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
+ 'description', fatal=False)
+ categories = [
+ c.strip()
+ for c in self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+ if c.strip()]
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, fatal=False))
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'categories': categories,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ })
diff --git a/hypervideo_dl/extractor/helsinki.py b/hypervideo_dl/extractor/helsinki.py
new file mode 100644
index 0000000..575fb33
--- /dev/null
+++ b/hypervideo_dl/extractor/helsinki.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class HelsinkiIE(InfoExtractor):
+ IE_DESC = 'helsinki.fi'
+ _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258',
+ 'info_dict': {
+ 'id': '20258',
+ 'ext': 'mp4',
+ 'title': 'Tietotekniikkafoorumi-iltapäivä',
+ 'description': 'md5:f5c904224d43c133225130fe156a5ee0',
+ },
+ 'params': {
+ 'skip_download': True, # RTMP
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ params = self._parse_json(self._html_search_regex(
+ r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
+ webpage, 'player code'), video_id, transform_source=js_to_json)
+ formats = [{
+ 'url': s['file'],
+ 'ext': 'mp4',
+ } for s in params['sources']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage).replace('Video: ', ''),
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/hentaistigma.py b/hypervideo_dl/extractor/hentaistigma.py
new file mode 100644
index 0000000..86a93de
--- /dev/null
+++ b/hypervideo_dl/extractor/hentaistigma.py
@@ -0,0 +1,39 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class HentaiStigmaIE(InfoExtractor):
+ _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
+ 'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
+ 'info_dict': {
+ 'id': 'inyouchuu-etsu-bonus',
+ 'ext': 'mp4',
+ 'title': 'Inyouchuu Etsu Bonus',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
+ webpage, 'title')
+ wrap_url = self._html_search_regex(
+ r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
+ wrap_webpage = self._download_webpage(wrap_url, video_id)
+
+ video_url = self._html_search_regex(
+ r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/hgtv.py b/hypervideo_dl/extractor/hgtv.py
new file mode 100644
index 0000000..a4f3325
--- /dev/null
+++ b/hypervideo_dl/extractor/hgtv.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class HGTVComShowIE(InfoExtractor):
+ IE_NAME = 'hgtv.com:show'
+ _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # data-module="video"
+ 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
+ 'info_dict': {
+ 'id': 'flip-or-flop-full-episodes-season-4-videos',
+ 'title': 'Flip or Flop Full Episodes',
+ },
+ 'playlist_mincount': 15,
+ }, {
+ # data-deferred-module="video"
+ 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ config = self._parse_json(
+ self._search_regex(
+ r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
+ webpage, 'video config'),
+ display_id)['channels'][0]
+
+ entries = [
+ self.url_result(video['releaseUrl'])
+ for video in config['videos'] if video.get('releaseUrl')]
+
+ return self.playlist_result(
+ entries, display_id, config.get('title'), config.get('description'))
diff --git a/hypervideo_dl/extractor/hidive.py b/hypervideo_dl/extractor/hidive.py
new file mode 100644
index 0000000..f26f802
--- /dev/null
+++ b/hypervideo_dl/extractor/hidive.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class HiDiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
+ # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
+ # so disabling geo bypass completely
+ _GEO_BYPASS = False
+ _NETRC_MACHINE = 'hidive'
+ _LOGIN_URL = 'https://www.hidive.com/account/login'
+
+ _TESTS = [{
+ 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
+ 'info_dict': {
+ 'id': 'the-comic-artist-and-his-assistants/s01e001',
+ 'ext': 'mp4',
+ 'title': 'the-comic-artist-and-his-assistants/s01e001',
+ 'series': 'the-comic-artist-and-his-assistants',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Authentication',
+ }]
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ webpage = self._download_webpage(self._LOGIN_URL, None)
+ form = self._search_regex(
+ r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
+ webpage, 'login form')
+ data = self._hidden_inputs(form)
+ data.update({
+ 'Email': email,
+ 'Password': password,
+ })
+ self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title, key = mobj.group('title', 'key')
+ video_id = '%s/%s' % (title, key)
+
+ settings = self._download_json(
+ 'https://www.hidive.com/play/settings', video_id,
+ data=urlencode_postdata({
+ 'Title': title,
+ 'Key': key,
+ 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
+ }))
+
+ restriction = settings.get('restrictionReason')
+ if restriction == 'RegionRestricted':
+ self.raise_geo_restricted()
+
+ if restriction and restriction != 'None':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, restriction), expected=True)
+
+ formats = []
+ subtitles = {}
+ for rendition_id, rendition in settings['renditions'].items():
+ bitrates = rendition.get('bitrates')
+ if not isinstance(bitrates, dict):
+ continue
+ m3u8_url = url_or_none(bitrates.get('hls'))
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='%s-hls' % rendition_id, fatal=False))
+ cc_files = rendition.get('ccFiles')
+ if not isinstance(cc_files, list):
+ continue
+ for cc_file in cc_files:
+ if not isinstance(cc_file, list) or len(cc_file) < 3:
+ continue
+ cc_lang = cc_file[0]
+ cc_url = url_or_none(cc_file[2])
+ if not isinstance(cc_lang, compat_str) or not cc_url:
+ continue
+ subtitles.setdefault(cc_lang, []).append({
+ 'url': cc_url,
+ })
+ self._sort_formats(formats)
+
+ season_number = int_or_none(self._search_regex(
+ r's(\d+)', key, 'season number', default=None))
+ episode_number = int_or_none(self._search_regex(
+ r'e(\d+)', key, 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'series': title,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ }
diff --git a/hypervideo_dl/extractor/historicfilms.py b/hypervideo_dl/extractor/historicfilms.py
new file mode 100644
index 0000000..56343e9
--- /dev/null
+++ b/hypervideo_dl/extractor/historicfilms.py
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class HistoricFilmsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.historicfilms.com/tapes/4728',
+ 'md5': 'd4a437aec45d8d796a38a215db064e9a',
+ 'info_dict': {
+ 'id': '4728',
+ 'ext': 'mov',
+ 'title': 'Historic Films: GP-7',
+ 'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2096,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ tape_id = self._search_regex(
+ [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'],
+ webpage, 'tape id')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._html_search_meta(
+ 'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+
+ video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/hitbox.py b/hypervideo_dl/extractor/hitbox.py
new file mode 100644
index 0000000..3e5ff26
--- /dev/null
+++ b/hypervideo_dl/extractor/hitbox.py
@@ -0,0 +1,214 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ parse_iso8601,
+ float_or_none,
+ int_or_none,
+ compat_str,
+ determine_ext,
+)
+
+
+class HitboxIE(InfoExtractor):
+ IE_NAME = 'hitbox'
+ _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.hitbox.tv/video/203213',
+ 'info_dict': {
+ 'id': '203213',
+ 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
+ 'alt_title': 'hitboxlive - Aug 9th #6',
+ 'description': '',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 215.1666,
+ 'resolution': 'HD 720p',
+ 'uploader': 'hitboxlive',
+ 'view_count': int,
+ 'timestamp': 1407576133,
+ 'upload_date': '20140809',
+ 'categories': ['Live Show'],
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
+ 'only_matching': True,
+ }]
+
+ def _extract_metadata(self, url, video_id):
+ thumb_base = 'https://edge.sf.hitbox.tv'
+ metadata = self._download_json(
+ '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
+
+ date = 'media_live_since'
+ media_type = 'livestream'
+ if metadata.get('media_type') == 'video':
+ media_type = 'video'
+ date = 'media_date_added'
+
+ video_meta = metadata.get(media_type, [])[0]
+ title = video_meta.get('media_status')
+ alt_title = video_meta.get('media_title')
+ description = clean_html(
+ video_meta.get('media_description')
+ or video_meta.get('media_description_md'))
+ duration = float_or_none(video_meta.get('media_duration'))
+ uploader = video_meta.get('media_user_name')
+ views = int_or_none(video_meta.get('media_views'))
+ timestamp = parse_iso8601(video_meta.get(date), ' ')
+ categories = [video_meta.get('category_name')]
+ thumbs = [{
+ 'url': thumb_base + video_meta.get('media_thumbnail'),
+ 'width': 320,
+ 'height': 180
+ }, {
+ 'url': thumb_base + video_meta.get('media_thumbnail_large'),
+ 'width': 768,
+ 'height': 432
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': alt_title,
+ 'description': description,
+ 'ext': 'mp4',
+ 'thumbnails': thumbs,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'view_count': views,
+ 'timestamp': timestamp,
+ 'categories': categories,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_config = self._download_json(
+ 'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
+ video_id, 'Downloading video JSON')
+
+ formats = []
+ for video in player_config['clip']['bitrates']:
+ label = video.get('label')
+ if label == 'Auto':
+ continue
+ video_url = video.get('url')
+ if not video_url:
+ continue
+ bitrate = int_or_none(video.get('bitrate'))
+ if determine_ext(video_url) == 'm3u8':
+ if not video_url.startswith('http'):
+ continue
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'tbr': bitrate,
+ 'format_note': label,
+ 'protocol': 'm3u8_native',
+ })
+ else:
+ formats.append({
+ 'url': video_url,
+ 'tbr': bitrate,
+ 'format_note': label,
+ })
+ self._sort_formats(formats)
+
+ metadata = self._extract_metadata(
+ 'https://www.smashcast.tv/api/media/video', video_id)
+ metadata['formats'] = formats
+
+ return metadata
+
+
+class HitboxLiveIE(HitboxIE):
+ IE_NAME = 'hitbox:live'
+ _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.hitbox.tv/dimak',
+ 'info_dict': {
+ 'id': 'dimak',
+ 'ext': 'mp4',
+ 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
+ 'timestamp': int,
+ 'upload_date': compat_str,
+ 'title': compat_str,
+ 'uploader': 'Dimak',
+ },
+ 'params': {
+ # live
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.smashcast.tv/dimak',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_config = self._download_json(
+ 'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
+ video_id)
+
+ formats = []
+ cdns = player_config.get('cdns')
+ servers = []
+ for cdn in cdns:
+ # Subscribe URLs are not playable
+ if cdn.get('rtmpSubscribe') is True:
+ continue
+ base_url = cdn.get('netConnectionUrl')
+ host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
+ if base_url not in servers:
+ servers.append(base_url)
+ for stream in cdn.get('bitrates'):
+ label = stream.get('label')
+ if label == 'Auto':
+ continue
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ bitrate = int_or_none(stream.get('bitrate'))
+ if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+ if not stream_url.startswith('http'):
+ continue
+ formats.append({
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'tbr': bitrate,
+ 'format_note': label,
+ 'rtmp_live': True,
+ })
+ else:
+ formats.append({
+ 'url': '%s/%s' % (base_url, stream_url),
+ 'ext': 'mp4',
+ 'tbr': bitrate,
+ 'rtmp_live': True,
+ 'format_note': host,
+ 'page_url': url,
+ 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
+ })
+ self._sort_formats(formats)
+
+ metadata = self._extract_metadata(
+ 'https://www.smashcast.tv/api/media/live', video_id)
+ metadata['formats'] = formats
+ metadata['is_live'] = True
+ metadata['title'] = self._live_title(metadata.get('title'))
+
+ return metadata
diff --git a/hypervideo_dl/extractor/hitrecord.py b/hypervideo_dl/extractor/hitrecord.py
new file mode 100644
index 0000000..fd5dc29
--- /dev/null
+++ b/hypervideo_dl/extractor/hitrecord.py
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ try_get,
+)
+
+
+class HitRecordIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://hitrecord.org/records/2954362',
+ 'md5': 'fe1cdc2023bce0bbb95c39c57426aa71',
+ 'info_dict': {
+ 'id': '2954362',
+ 'ext': 'mp4',
+ 'title': 'A Very Different World (HITRECORD x ACLU)',
+ 'description': 'md5:e62defaffab5075a5277736bead95a3d',
+ 'duration': 139.327,
+ 'timestamp': 1471557582,
+ 'upload_date': '20160818',
+ 'uploader': 'Zuzi.C12',
+ 'uploader_id': '362811',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://hitrecord.org/api/web/records/%s' % video_id, video_id)
+
+ title = video['title']
+ video_url = video['source_url']['mp4_url']
+
+ tags = None
+ tags_list = try_get(video, lambda x: x['tags'], list)
+ if tags_list:
+ tags = [
+ t['text']
+ for t in tags_list
+ if isinstance(t, dict) and t.get('text')
+ and isinstance(t['text'], compat_str)]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': clean_html(video.get('body')),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'timestamp': int_or_none(video.get('created_at_i')),
+ 'uploader': try_get(
+ video, lambda x: x['user']['username'], compat_str),
+ 'uploader_id': try_get(
+ video, lambda x: compat_str(x['user']['id'])),
+ 'view_count': int_or_none(video.get('total_views_count')),
+ 'like_count': int_or_none(video.get('hearts_count')),
+ 'comment_count': int_or_none(video.get('comments_count')),
+ 'tags': tags,
+ }
diff --git a/hypervideo_dl/extractor/hketv.py b/hypervideo_dl/extractor/hketv.py
new file mode 100644
index 0000000..1f3502b
--- /dev/null
+++ b/hypervideo_dl/extractor/hketv.py
@@ -0,0 +1,191 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_count,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class HKETVIE(InfoExtractor):
+ IE_NAME = 'hketv'
+ IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['HK']
+ _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.hkedcity.net/etv/resource/2932360618',
+ 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
+ 'info_dict': {
+ 'id': '2932360618',
+ 'ext': 'mp4',
+ 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
+ 'description': 'md5:d5286d05219ef50e0613311cbe96e560',
+ 'upload_date': '20181024',
+ 'duration': 900,
+ 'subtitles': 'count:2',
+ },
+ 'skip': 'Geo restricted to HK',
+ }, {
+ 'url': 'https://www.hkedcity.net/etv/resource/972641418',
+ 'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
+ 'info_dict': {
+ 'id': '972641418',
+ 'ext': 'mp4',
+ 'title': '衣冠楚楚 (天使系列之一)',
+ 'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
+ 'upload_date': '20070109',
+ 'duration': 907,
+ 'subtitles': {},
+ },
+ 'params': {
+ 'geo_verification_proxy': '<HK proxy here>',
+ },
+ 'skip': 'Geo restricted to HK',
+ }]
+
+ _CC_LANGS = {
+ '中文(繁體中文)': 'zh-Hant',
+ '中文(简体中文)': 'zh-Hans',
+ 'English': 'en',
+ 'Bahasa Indonesia': 'id',
+ '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
+ '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
+ 'Tagalog': 'tl',
+ '\u0e44\u0e17\u0e22': 'th',
+ '\u0627\u0631\u062f\u0648': 'ur',
+ }
+ _FORMAT_HEIGHTS = {
+ 'SD': 360,
+ 'HD': 720,
+ }
+ _APPS_BASE_URL = 'https://apps.hkedcity.net'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = (
+ self._html_search_meta(
+ ('ed_title', 'search.ed_title'), webpage, default=None)
+ or self._search_regex(
+ r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'title', default=None, group='url')
+ or self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
+ or self._og_search_title(webpage)
+ )
+
+ file_id = self._search_regex(
+ r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
+ webpage, 'file ID')
+ curr_url = self._search_regex(
+ r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
+ webpage, 'curr URL')
+ data = {
+ 'action': 'get_info',
+ 'curr_url': curr_url,
+ 'file_id': file_id,
+ 'video_url': file_id,
+ }
+
+ response = self._download_json(
+ self._APPS_BASE_URL + '/media/play/handler.php', video_id,
+ data=urlencode_postdata(data),
+ headers=merge_dicts({
+ 'Content-Type': 'application/x-www-form-urlencoded'},
+ self.geo_verification_headers()))
+
+ result = response['result']
+
+ if not response.get('success') or not response.get('access'):
+ error = clean_html(response.get('access_err_msg'))
+ if 'Video streaming is not available in your country' in error:
+ self.raise_geo_restricted(
+ msg=error, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(error, expected=True)
+
+ formats = []
+
+ width = int_or_none(result.get('width'))
+ height = int_or_none(result.get('height'))
+
+ playlist0 = result['playlist'][0]
+ for fmt in playlist0['sources']:
+ file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
+ if not file_url:
+ continue
+ # If we ever wanted to provide the final resolved URL that
+ # does not require cookies, albeit with a shorter lifespan:
+ # urlh = self._downloader.urlopen(file_url)
+ # resolved_url = urlh.geturl()
+ label = fmt.get('label')
+ h = self._FORMAT_HEIGHTS.get(label)
+ w = h * width // height if h and width and height else None
+ formats.append({
+ 'format_id': label,
+ 'ext': fmt.get('type'),
+ 'url': file_url,
+ 'width': w,
+ 'height': h,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = str_or_none(track.get('kind'))
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
+ if not track_url:
+ continue
+ track_label = track.get('label')
+ subtitles.setdefault(self._CC_LANGS.get(
+ track_label, track_label), []).append({
+ 'url': self._proto_relative_url(track_url),
+ 'ext': 'srt',
+ })
+
+ # Likes
+ emotion = self._download_json(
+ 'https://emocounter.hkedcity.net/handler.php', video_id,
+ data=urlencode_postdata({
+ 'action': 'get_emotion',
+ 'data[bucket_id]': 'etv',
+ 'data[identifier]': video_id,
+ }),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'},
+ fatal=False) or {}
+ like_count = int_or_none(try_get(
+ emotion, lambda x: x['data']['emotion_data'][0]['count']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta(
+ 'description', webpage, fatal=False),
+ 'upload_date': unified_strdate(self._html_search_meta(
+ 'ed_date', webpage, fatal=False), day_first=False),
+ 'duration': int_or_none(result.get('length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
+ 'view_count': parse_count(result.get('view_count')),
+ 'like_count': like_count,
+ }
diff --git a/hypervideo_dl/extractor/hornbunny.py b/hypervideo_dl/extractor/hornbunny.py
new file mode 100644
index 0000000..c458a95
--- /dev/null
+++ b/hypervideo_dl/extractor/hornbunny.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class HornBunnyIE(InfoExtractor):
+ _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
+ 'md5': 'e20fd862d1894b67564c96f180f43924',
+ 'info_dict': {
+ 'id': '5227',
+ 'ext': 'mp4',
+ 'title': 'panty slut jerk off instruction',
+ 'duration': 550,
+ 'age_limit': 18,
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ duration = parse_duration(self._search_regex(
+ r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
+ webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'<strong>Views:</strong>\s*(\d+)</div>',
+ webpage, 'view count', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/hotnewhiphop.py b/hypervideo_dl/extractor/hotnewhiphop.py
new file mode 100644
index 0000000..4703e18
--- /dev/null
+++ b/hypervideo_dl/extractor/hotnewhiphop.py
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_b64decode
+from ..utils import (
+ ExtractorError,
+ HEADRequest,
+ sanitized_Request,
+ urlencode_postdata,
+)
+
+
+class HotNewHipHopIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
+ _TEST = {
+ 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
+ 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
+ 'info_dict': {
+ 'id': '1435540',
+ 'ext': 'mp3',
+ 'title': 'Freddie Gibbs - Lay It Down'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url_base64 = self._search_regex(
+ r'data-path="(.*?)"', webpage, 'video URL', default=None)
+
+ if video_url_base64 is None:
+ video_url = self._search_regex(
+ r'"contentUrl" content="(.*?)"', webpage, 'content URL')
+ return self.url_result(video_url, ie='Youtube')
+
+ reqdata = urlencode_postdata([
+ ('mediaType', 's'),
+ ('mediaId', video_id),
+ ])
+ r = sanitized_Request(
+ 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
+ r.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ mkd = self._download_json(
+ r, video_id, note='Requesting media key',
+ errnote='Could not download media key')
+ if 'mediaKey' not in mkd:
+ raise ExtractorError('Did not get a media key')
+
+ redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
+ redirect_req = HEADRequest(redirect_url)
+ req = self._request_webpage(
+ redirect_req, video_id,
+ note='Resolving final URL', errnote='Could not resolve final URL')
+ video_url = req.geturl()
+ if video_url.endswith('.html'):
+ raise ExtractorError('Redirect failed')
+
+ video_title = self._og_search_title(webpage).strip()
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/hotstar.py b/hypervideo_dl/extractor/hotstar.py
new file mode 100644
index 0000000..1620822
--- /dev/null
+++ b/hypervideo_dl/extractor/hotstar.py
@@ -0,0 +1,252 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import hmac
+import json
+import re
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class HotStarBaseIE(InfoExtractor):
+ _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
+
+ def _call_api_impl(self, path, video_id, headers, query, data=None):
+ st = int(time.time())
+ exp = st + 6000
+ auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
+ auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
+ h = {'hotstarauth': auth}
+ h.update(headers)
+ return self._download_json(
+ 'https://api.hotstar.com/' + path,
+ video_id, headers=h, query=query, data=data)
+
+ def _call_api(self, path, video_id, query_name='contentId'):
+ response = self._call_api_impl(path, video_id, {
+ 'x-country-code': 'IN',
+ 'x-platform-code': 'JIO',
+ }, {
+ query_name: video_id,
+ 'tas': 10000,
+ })
+ if response['statusCode'] != 'OK':
+ raise ExtractorError(
+ response['body']['message'], expected=True)
+ return response['body']['results']
+
+ def _call_api_v2(self, path, video_id, headers, query=None, data=None):
+ h = {'X-Request-Id': compat_str(uuid.uuid4())}
+ h.update(headers)
+ try:
+ return self._call_api_impl(
+ path, video_id, h, query, data)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ if e.cause.code == 402:
+ self.raise_login_required()
+ message = self._parse_json(e.cause.read().decode(), video_id)['message']
+ if message in ('Content not available in region', 'Country is not supported'):
+ raise self.raise_geo_restricted(message)
+ raise ExtractorError(message)
+ raise e
+
+
+class HotStarIE(HotStarBaseIE):
+ IE_NAME = 'hotstar'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
+ _TESTS = [{
+ # contentData
+ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
+ 'info_dict': {
+ 'id': '1000076273',
+ 'ext': 'mp4',
+ 'title': 'Can You Not Spread Rumours?',
+ 'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
+ 'timestamp': 1447248600,
+ 'upload_date': '20151111',
+ 'duration': 381,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # contentDetail
+ 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.hotstar.com/1000000515',
+ 'only_matching': True,
+ }, {
+ # only available via api v2
+ 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _DEVICE_ID = None
+ _USER_TOKEN = None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ app_state = self._parse_json(self._search_regex(
+ r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
+ webpage, 'app state'), video_id)
+ video_data = {}
+ getters = list(
+ lambda x, k=k: x['initialState']['content%s' % k]['content']
+ for k in ('Data', 'Detail')
+ )
+ for v in app_state.values():
+ content = try_get(v, getters, dict)
+ if content and content.get('contentId') == video_id:
+ video_data = content
+ break
+
+ title = video_data['title']
+
+ if video_data.get('drmProtected'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ headers = {'Referer': url}
+ formats = []
+ geo_restricted = False
+
+ if not self._USER_TOKEN:
+ self._DEVICE_ID = compat_str(uuid.uuid4())
+ self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
+ 'X-HS-Platform': 'PCTV',
+ 'Content-Type': 'application/json',
+ }, data=json.dumps({
+ 'device_ids': [{
+ 'id': self._DEVICE_ID,
+ 'type': 'device_id',
+ }],
+ }).encode())['user_identity']
+
+ playback_sets = self._call_api_v2(
+ 'play/v2/playback/content/' + video_id, video_id, {
+ 'X-HS-Platform': 'web',
+ 'X-HS-AppVersion': '6.99.1',
+ 'X-HS-UserToken': self._USER_TOKEN,
+ }, query={
+ 'device-id': self._DEVICE_ID,
+ 'desired-config': 'encryption:plain',
+ 'os-name': 'Windows',
+ 'os-version': '10',
+ })['data']['playBackSets']
+ for playback_set in playback_sets:
+ if not isinstance(playback_set, dict):
+ continue
+ format_url = url_or_none(playback_set.get('playbackUrl'))
+ if not format_url:
+ continue
+ format_url = re.sub(
+ r'(?<=//staragvod)(\d)', r'web\1', format_url)
+ tags = str_or_none(playback_set.get('tagsCombination')) or ''
+ if tags and 'encryption:plain' not in tags:
+ continue
+ ext = determine_ext(format_url)
+ try:
+ if 'package:hls' in tags or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id='hls', headers=headers))
+ elif 'package:dash' in tags or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', headers=headers))
+ elif ext == 'f4m':
+ # produce broken files
+ pass
+ else:
+ formats.append({
+ 'url': format_url,
+ 'width': int_or_none(playback_set.get('width')),
+ 'height': int_or_none(playback_set.get('height')),
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ geo_restricted = True
+ continue
+ if not formats and geo_restricted:
+ self.raise_geo_restricted(countries=['IN'])
+ self._sort_formats(formats)
+
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ image = try_get(video_data, lambda x: x['image']['h'], compat_str)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
+ 'formats': formats,
+ 'channel': video_data.get('channelName'),
+ 'channel_id': str_or_none(video_data.get('channelId')),
+ 'series': video_data.get('showName'),
+ 'season': video_data.get('seasonName'),
+ 'season_number': int_or_none(video_data.get('seasonNo')),
+ 'season_id': str_or_none(video_data.get('seasonId')),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episodeNo')),
+ }
+
+
+class HotStarPlaylistIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:playlist'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
+ 'info_dict': {
+ 'id': '3_2_26',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
+
+ entries = [
+ self.url_result(
+ 'https://www.hotstar.com/%s' % video['contentId'],
+ ie=HotStarIE.ie_key(), video_id=video['contentId'])
+ for video in collection['assets']['items']
+ if video.get('contentId')]
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/howcast.py b/hypervideo_dl/extractor/howcast.py
new file mode 100644
index 0000000..7e36b85
--- /dev/null
+++ b/hypervideo_dl/extractor/howcast.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class HowcastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
+ 'md5': '7d45932269a288149483144f01b99789',
+ 'info_dict': {
+ 'id': '390161',
+ 'ext': 'mp4',
+ 'title': 'How to Tie a Square Knot Properly',
+ 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
+ 'timestamp': 1276081287,
+ 'upload_date': '20100609',
+ 'duration': 56.823,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ embed_code = self._search_regex(
+ r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
+ webpage, 'ooyala embed code')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % embed_code,
+ 'id': video_id,
+ 'timestamp': parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp')),
+ }
diff --git a/hypervideo_dl/extractor/howstuffworks.py b/hypervideo_dl/extractor/howstuffworks.py
new file mode 100644
index 0000000..cf90ab3
--- /dev/null
+++ b/hypervideo_dl/extractor/howstuffworks.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ find_xpath_attr,
+ int_or_none,
+ js_to_json,
+ unescapeHTML,
+ determine_ext,
+)
+
+
+class HowStuffWorksIE(InfoExtractor):
+ _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
+ _TESTS = [
+ {
+ 'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
+ 'md5': '76646a5acc0c92bf7cd66751ca5db94d',
+ 'info_dict': {
+ 'id': '855410',
+ 'ext': 'mp4',
+ 'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
+ 'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
+ },
+ },
+ {
+ 'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ clip_js = self._search_regex(
+ r'(?s)var clip = ({.*?});', webpage, 'clip info')
+ clip_info = self._parse_json(
+ clip_js, display_id, transform_source=js_to_json)
+
+ video_id = clip_info['content_id']
+ formats = []
+ m3u8_url = clip_info.get('m3u8')
+ if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True))
+ flv_url = clip_info.get('flv_url')
+ if flv_url:
+ formats.append({
+ 'url': flv_url,
+ 'format_id': 'flv',
+ })
+ for video in clip_info.get('mp4', []):
+ formats.append({
+ 'url': video['src'],
+ 'format_id': 'mp4-%s' % video['bitrate'],
+ 'vbr': int_or_none(video['bitrate'].rstrip('k')),
+ })
+
+ if not formats:
+ smil = self._download_xml(
+ 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id,
+ video_id, 'Downloading video SMIL')
+
+ http_base = find_xpath_attr(
+ smil,
+ './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
+ 'name',
+ 'httpBase').get('content')
+
+ URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'
+
+ for video in smil.findall(
+ './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
+ vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)
+ formats.append({
+ 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
+ 'format_id': '%dk' % vbr,
+ 'vbr': vbr,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': '%s' % video_id,
+ 'display_id': display_id,
+ 'title': unescapeHTML(clip_info['clip_title']),
+ 'description': unescapeHTML(clip_info.get('caption')),
+ 'thumbnail': clip_info.get('video_still_url'),
+ 'duration': int_or_none(clip_info.get('duration')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/hrti.py b/hypervideo_dl/extractor/hrti.py
new file mode 100644
index 0000000..23f7b1f
--- /dev/null
+++ b/hypervideo_dl/extractor/hrti.py
@@ -0,0 +1,208 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ sanitized_Request,
+ try_get,
+)
+
+
+class HRTiBaseIE(InfoExtractor):
+ """
+ Base Information Extractor for Croatian Radiotelevision
+ video on demand site https://hrti.hrt.hr
+ Reverse engineered from the JavaScript app in app.min.js
+ """
+ _NETRC_MACHINE = 'hrti'
+
+ _APP_LANGUAGE = 'hr'
+ _APP_VERSION = '1.1'
+ _APP_PUBLICATION_ID = 'all_in_one'
+ _API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
+
+ def _initialize_api(self):
+ init_data = {
+ 'application_publication_id': self._APP_PUBLICATION_ID
+ }
+
+ uuid = self._download_json(
+ self._API_URL, None, note='Downloading uuid',
+ errnote='Unable to download uuid',
+ data=json.dumps(init_data).encode('utf-8'))['uuid']
+
+ app_data = {
+ 'uuid': uuid,
+ 'application_publication_id': self._APP_PUBLICATION_ID,
+ 'application_version': self._APP_VERSION
+ }
+
+ req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
+ req.get_method = lambda: 'PUT'
+
+ resources = self._download_json(
+ req, None, note='Downloading session information',
+ errnote='Unable to download session information')
+
+ self._session_id = resources['session_id']
+
+ modules = resources['modules']
+
+ self._search_url = modules['vod_catalog']['resources']['search']['uri'].format(
+ language=self._APP_LANGUAGE,
+ application_id=self._APP_PUBLICATION_ID)
+
+ self._login_url = (modules['user']['resources']['login']['uri']
+ + '/format/json').format(session_id=self._session_id)
+
+ self._logout_url = modules['user']['resources']['logout']['uri']
+
+ def _login(self):
+ username, password = self._get_login_info()
+ # TODO: figure out authentication with cookies
+ if username is None or password is None:
+ self.raise_login_required()
+
+ auth_data = {
+ 'username': username,
+ 'password': password,
+ }
+
+ try:
+ auth_info = self._download_json(
+ self._login_url, None, note='Logging in', errnote='Unable to log in',
+ data=json.dumps(auth_data).encode('utf-8'))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
+ auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
+ else:
+ raise
+
+ error_message = auth_info.get('error', {}).get('message')
+ if error_message:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_message),
+ expected=True)
+
+ self._token = auth_info['secure_streaming_token']
+
+ def _real_initialize(self):
+ self._initialize_api()
+ self._login()
+
+
+class HRTiIE(HRTiBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ hrti:(?P<short_id>[0-9]+)|
+ https?://
+ hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
+ 'info_dict': {
+ 'id': '2181385',
+ 'display_id': 'republika-dokumentarna-serija-16-hd',
+ 'ext': 'mp4',
+ 'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
+ 'description': 'md5:48af85f620e8e0e1df4096270568544f',
+ 'duration': 2922,
+ 'view_count': int,
+ 'average_rating': int,
+ 'episode_number': int,
+ 'season_number': int,
+ 'age_limit': 12,
+ },
+ 'skip': 'Requires account credentials',
+ }, {
+ 'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
+ 'only_matching': True,
+ }, {
+ 'url': 'hrti:2181385',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('short_id') or mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ video = self._download_json(
+ '%s/video_id/%s/format/json' % (self._search_url, video_id),
+ display_id, 'Downloading video metadata JSON')['video'][0]
+
+ title_info = video['title']
+ title = title_info['title_long']
+
+ movie = video['video_assets']['movie'][0]
+ m3u8_url = movie['url'].format(TOKEN=self._token)
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ description = clean_html(title_info.get('summary_long'))
+ age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
+ view_count = int_or_none(video.get('views'))
+ average_rating = int_or_none(video.get('user_rating'))
+ duration = int_or_none(movie.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
+
+
+class HRTiPlaylistIE(HRTiBaseIE):
+ _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
+ _TESTS = [{
+ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
+ 'info_dict': {
+ 'id': '212',
+ 'title': 'ekumena',
+ },
+ 'playlist_mincount': 8,
+ 'skip': 'Requires account credentials',
+ }, {
+ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ category_id = mobj.group('id')
+ display_id = mobj.group('display_id') or category_id
+
+ response = self._download_json(
+ '%s/category_id/%s/format/json' % (self._search_url, category_id),
+ display_id, 'Downloading video metadata JSON')
+
+ video_ids = try_get(
+ response, lambda x: x['video_listings'][0]['alternatives'][0]['list'],
+ list) or [video['id'] for video in response.get('videos', []) if video.get('id')]
+
+ entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids]
+
+ return self.playlist_result(entries, category_id, display_id)
diff --git a/hypervideo_dl/extractor/huajiao.py b/hypervideo_dl/extractor/huajiao.py
new file mode 100644
index 0000000..4ca275d
--- /dev/null
+++ b/hypervideo_dl/extractor/huajiao.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class HuajiaoIE(InfoExtractor):
+ IE_DESC = '花椒直播'
+ _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.huajiao.com/l/38941232',
+ 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
+ 'info_dict': {
+ 'id': '38941232',
+ 'ext': 'mp4',
+ 'title': '#新人求关注#',
+ 'description': 're:.*',
+ 'duration': 2424.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1475866459,
+ 'upload_date': '20161007',
+ 'uploader': 'Penny_余姿昀',
+ 'uploader_id': '75206005',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ feed_json = self._search_regex(
+ r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
+ feed = self._parse_json(feed_json, video_id)
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description', fatal=False)
+
+ def get(section, field):
+ return feed.get(section, {}).get(field)
+
+ return {
+ 'id': video_id,
+ 'title': feed['feed']['formated_title'],
+ 'description': description,
+ 'duration': parse_duration(get('feed', 'duration')),
+ 'thumbnail': get('feed', 'image'),
+ 'timestamp': parse_iso8601(feed.get('creatime'), ' '),
+ 'uploader': get('author', 'nickname'),
+ 'uploader_id': get('author', 'uid'),
+ 'formats': self._extract_m3u8_formats(
+ feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
+ }
diff --git a/hypervideo_dl/extractor/huffpost.py b/hypervideo_dl/extractor/huffpost.py
new file mode 100644
index 0000000..97e36f0
--- /dev/null
+++ b/hypervideo_dl/extractor/huffpost.py
@@ -0,0 +1,96 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class HuffPostIE(InfoExtractor):
+ IE_DESC = 'Huffington Post'
+ _VALID_URL = r'''(?x)
+ https?://(embed\.)?live\.huffingtonpost\.com/
+ (?:
+ r/segment/[^/]+/|
+ HPLEmbedPlayer/\?segmentId=
+ )
+ (?P<id>[0-9a-f]+)'''
+
+ _TEST = {
+ 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
+ 'md5': '55f5e8981c1c80a64706a44b74833de8',
+ 'info_dict': {
+ 'id': '52dd3e4b02a7602131000677',
+ 'ext': 'mp4',
+ 'title': 'Legalese It! with @MikeSacksHP',
+ 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
+ 'duration': 1549,
+ 'upload_date': '20140124',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 404: Not Found'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
+ data = self._download_json(api_url, video_id)['data']
+
+ video_title = data['title']
+ duration = parse_duration(data.get('running_time'))
+ upload_date = unified_strdate(
+ data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
+ description = data.get('description')
+
+ thumbnails = []
+ for url in filter(None, data['images'].values()):
+ m = re.match(r'.*-([0-9]+x[0-9]+)\.', url)
+ if not m:
+ continue
+ thumbnails.append({
+ 'url': url,
+ 'resolution': m.group(1),
+ })
+
+ formats = []
+ sources = data.get('sources', {})
+ live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
+ for key, url in live_sources:
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'format': key,
+ 'format_id': key.replace('/', '.'),
+ 'ext': 'mp4',
+ 'url': url,
+ 'vcodec': 'none' if key.startswith('audio/') else None,
+ })
+
+ if not formats and data.get('fivemin_id'):
+ return self.url_result('5min:%s' % data['fivemin_id'])
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': description,
+ 'formats': formats,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/hungama.py b/hypervideo_dl/extractor/hungama.py
new file mode 100644
index 0000000..3fdaac5
--- /dev/null
+++ b/hypervideo_dl/extractor/hungama.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class HungamaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?hungama\.com/
+ (?:
+ (?:video|movie)/[^/]+/|
+ tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
+ 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+ 'info_dict': {
+ 'id': '2931166',
+ 'ext': 'mp4',
+ 'title': 'Lucky Ali - Kitni Haseen Zindagi',
+ 'track': 'Kitni Haseen Zindagi',
+ 'artist': 'Lucky Ali',
+ 'album': 'Aks',
+ 'release_year': 2000,
+ }
+ }, {
+ 'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ info = self._search_json_ld(webpage, video_id)
+
+ m3u8_url = self._download_json(
+ 'https://www.hungama.com/index.php', video_id,
+ data=urlencode_postdata({'content_id': video_id}), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'c': 'common',
+ 'm': 'get_video_mdn_url',
+ })['stream_url']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ info.update({
+ 'id': video_id,
+ 'formats': formats,
+ })
+ return info
+
+
+class HungamaSongIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
+ 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+ 'info_dict': {
+ 'id': '2931166',
+ 'ext': 'mp4',
+ 'title': 'Lucky Ali - Kitni Haseen Zindagi',
+ 'track': 'Kitni Haseen Zindagi',
+ 'artist': 'Lucky Ali',
+ 'album': 'Aks',
+ 'release_year': 2000,
+ }
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
+ audio_id, query={'_country': 'IN'})[0]
+
+ track = data['song_name']
+ artist = data.get('singer_name')
+
+ m3u8_url = self._download_json(
+ data.get('file') or data['preview_link'],
+ audio_id)['response']['media_url']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ title = '%s - %s' % (artist, track) if artist else track
+ thumbnail = data.get('img_src') or data.get('album_image')
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'track': track,
+ 'artist': artist,
+ 'album': data.get('album_name'),
+ 'release_year': int_or_none(data.get('date')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/hypem.py b/hypervideo_dl/extractor/hypem.py
new file mode 100644
index 0000000..9ca28d6
--- /dev/null
+++ b/hypervideo_dl/extractor/hypem.py
@@ -0,0 +1,49 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class HypemIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[0-9a-z]{5})'
+ _TEST = {
+ 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
+ 'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
+ 'info_dict': {
+ 'id': '1v6ga',
+ 'ext': 'mp3',
+ 'title': 'Tame',
+ 'uploader': 'BODYWORK',
+ 'timestamp': 1371810457,
+ 'upload_date': '20130621',
+ }
+ }
+
+ def _real_extract(self, url):
+ track_id = self._match_id(url)
+
+ response = self._download_webpage(url, track_id)
+
+ track = self._parse_json(self._html_search_regex(
+ r'(?s)<script\s+type="application/json"\s+id="displayList-data">(.+?)</script>',
+ response, 'tracks'), track_id)['tracks'][0]
+
+ track_id = track['id']
+ title = track['song']
+
+ final_url = self._download_json(
+ 'http://hypem.com/serve/source/%s/%s' % (track_id, track['key']),
+ track_id, 'Downloading metadata', headers={
+ 'Content-Type': 'application/json'
+ })['url']
+
+ return {
+ 'id': track_id,
+ 'url': final_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'uploader': track.get('artist'),
+ 'duration': int_or_none(track.get('time')),
+ 'timestamp': int_or_none(track.get('ts')),
+ 'track': title,
+ }
diff --git a/hypervideo_dl/extractor/ign.py b/hypervideo_dl/extractor/ign.py
new file mode 100644
index 0000000..0d9f50e
--- /dev/null
+++ b/hypervideo_dl/extractor/ign.py
@@ -0,0 +1,257 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ HEADRequest,
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class IGNBaseIE(InfoExtractor):
+ def _call_api(self, slug):
+ return self._download_json(
+ 'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
+
+
+class IGNIE(IGNBaseIE):
+ """
+ Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
+ Some videos of it.ign.com are also supported
+ """
+
+ _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
+ IE_NAME = 'ign.com'
+ _PAGE_TYPE = 'video'
+
+ _TESTS = [{
+ 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+ 'md5': 'd2e1586d9987d40fad7867bf96a018ea',
+ 'info_dict': {
+ 'id': '8f862beef863986b2785559b9e1aa599',
+ 'ext': 'mp4',
+ 'title': 'The Last of Us Review',
+ 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
+ 'timestamp': 1370440800,
+ 'upload_date': '20130605',
+ 'tags': 'count:9',
+ }
+ }, {
+ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
+ 'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
+ 'info_dict': {
+ 'id': 'ee10d774b508c9b8ec07e763b9125b91',
+ 'ext': 'mp4',
+ 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
+ 'description': 'md5:817a20299de610bd56f13175386da6fa',
+ 'timestamp': 1420571160,
+ 'upload_date': '20150106',
+ 'tags': 'count:4',
+ }
+ }, {
+ 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ video = self._call_api(display_id)
+ video_id = video['videoId']
+ metadata = video['metadata']
+ title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
+
+ formats = []
+ refs = video.get('refs') or {}
+
+ m3u8_url = refs.get('m3uUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ f4m_url = refs.get('f4mUrl')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False))
+
+ for asset in (video.get('assets') or []):
+ asset_url = asset.get('url')
+ if not asset_url:
+ continue
+ formats.append({
+ 'url': asset_url,
+ 'tbr': int_or_none(asset.get('bitrate'), 1000),
+ 'fps': int_or_none(asset.get('frame_rate')),
+ 'height': int_or_none(asset.get('height')),
+ 'width': int_or_none(asset.get('width')),
+ })
+
+ mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
+ if mezzanine_url:
+ formats.append({
+ 'ext': determine_ext(mezzanine_url, 'mp4'),
+ 'format_id': 'mezzanine',
+ 'preference': 1,
+ 'url': mezzanine_url,
+ })
+
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumbnail in (video.get('thumbnails') or []):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ })
+
+ tags = []
+ for tag in (video.get('tags') or []):
+ display_name = tag.get('displayName')
+ if not display_name:
+ continue
+ tags.append(display_name)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(metadata.get('description')),
+ 'timestamp': parse_iso8601(metadata.get('publishDate')),
+ 'duration': int_or_none(metadata.get('duration')),
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'tags': tags,
+ }
+
+
+class IGNVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
+ _TESTS = [{
+ 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
+ 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
+ 'info_dict': {
+ 'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
+ 'ext': 'mp4',
+ 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
+ 'description': 'Taking out assassination targets in Hitman has never been more stylish.',
+ 'timestamp': 1444665600,
+ 'upload_date': '20151012',
+ }
+ }, {
+ 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
+ 'only_matching': True,
+ }, {
+ # Youtube embed
+ 'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
+ 'only_matching': True,
+ }, {
+ # Twitter embed
+ 'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
+ 'only_matching': True,
+ }, {
+ # Vimeo embed
+ 'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
+ url = self._request_webpage(req, video_id).geturl()
+ ign_url = compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
+ if ign_url:
+ return self.url_result(ign_url, IGNIE.ie_key())
+ return self.url_result(url)
+
+
+class IGNArticleIE(IGNBaseIE):
+ _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
+ _PAGE_TYPE = 'article'
+ _TESTS = [{
+ 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+ 'info_dict': {
+ 'id': '524497489e4e8ff5848ece34',
+ 'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': '5ebbd138523268b93c9141af17bec937',
+ 'ext': 'mp4',
+ 'title': 'GTA 5 Video Review',
+ 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+ 'timestamp': 1379339880,
+ 'upload_date': '20130916',
+ },
+ },
+ {
+ 'info_dict': {
+ 'id': '638672ee848ae4ff108df2a296418ee2',
+ 'ext': 'mp4',
+ 'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+ 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
+ 'timestamp': 1386878820,
+ 'upload_date': '20131212',
+ },
+ },
+ ],
+ 'params': {
+ 'playlist_items': '2-3',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
+ 'info_dict': {
+ 'id': '53ee806780a81ec46e0790f8',
+ 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
+ },
+ 'playlist_count': 2,
+ }, {
+ # videoId pattern
+ 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
+ 'only_matching': True,
+ }, {
+ # Youtube embed
+ 'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
+ 'only_matching': True,
+ }, {
+ # IMDB embed
+ 'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
+ 'only_matching': True,
+ }, {
+ # Facebook embed
+ 'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
+ 'only_matching': True,
+ }, {
+ # Brightcove embed
+ 'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ article = self._call_api(display_id)
+
+ def entries():
+ media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
+ if media_url:
+ yield self.url_result(media_url, IGNIE.ie_key())
+ for content in (article.get('content') or []):
+ for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
+ yield self.url_result(video_url)
+
+ return self.playlist_result(
+ entries(), article.get('articleId'),
+ strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
diff --git a/hypervideo_dl/extractor/iheart.py b/hypervideo_dl/extractor/iheart.py
new file mode 100644
index 0000000..b54c05e
--- /dev/null
+++ b/hypervideo_dl/extractor/iheart.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ int_or_none,
+ str_or_none,
+)
+
+
+class IHeartRadioBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, fatal=True, query=None):
+ return self._download_json(
+ 'https://api.iheart.com/api/v3/podcast/' + path,
+ video_id, fatal=fatal, query=query)
+
+ def _extract_episode(self, episode):
+ return {
+ 'thumbnail': episode.get('imageUrl'),
+ 'description': clean_html(episode.get('description')),
+ 'timestamp': int_or_none(episode.get('startDate'), 1000),
+ 'duration': int_or_none(episode.get('duration')),
+ }
+
+
+class IHeartRadioIE(IHeartRadioBaseIE):
+ IENAME = 'iheartradio'
+ _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
+ 'md5': 'c8609c92c8688dcb69d8541042b8abca',
+ 'info_dict': {
+ 'id': '70346499',
+ 'ext': 'mp3',
+ 'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
+ 'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
+ 'timestamp': 1597741200,
+ 'upload_date': '20200818',
+ }
+ }
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api(
+ 'episodes/' + episode_id, episode_id)['episode']
+ info = self._extract_episode(episode)
+ info.update({
+ 'id': episode_id,
+ 'title': episode['title'],
+ 'url': clean_podcast_url(episode['mediaUrl']),
+ })
+ return info
+
+
+class IHeartRadioPodcastIE(IHeartRadioBaseIE):
+ IE_NAME = 'iheartradio:podcast'
+ _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
+ 'info_dict': {
+ 'id': '30717896',
+ 'title': 'It Could Happen Here',
+ 'description': 'md5:5842117412a967eb0b01f8088eb663e2',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+ path = 'podcasts/' + podcast_id
+ episodes = self._call_api(
+ path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
+
+ entries = []
+ for episode in episodes:
+ episode_id = str_or_none(episode.get('id'))
+ if not episode_id:
+ continue
+ info = self._extract_episode(episode)
+ info.update({
+ '_type': 'url',
+ 'id': episode_id,
+ 'title': episode.get('title'),
+ 'url': 'iheartradio:' + episode_id,
+ 'ie_key': IHeartRadioIE.ie_key(),
+ })
+ entries.append(info)
+
+ podcast = self._call_api(path, podcast_id, False) or {}
+
+ return self.playlist_result(
+ entries, podcast_id, podcast.get('title'), podcast.get('description'))
diff --git a/hypervideo_dl/extractor/imdb.py b/hypervideo_dl/extractor/imdb.py
new file mode 100644
index 0000000..a313019
--- /dev/null
+++ b/hypervideo_dl/extractor/imdb.py
@@ -0,0 +1,147 @@
+from __future__ import unicode_literals
+
+import base64
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ mimetype2ext,
+ parse_duration,
+ qualities,
+ try_get,
+ url_or_none,
+)
+
+
+class ImdbIE(InfoExtractor):
+ IE_NAME = 'imdb'
+ IE_DESC = 'Internet Movie Database trailers'
+ _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
+ 'info_dict': {
+ 'id': '2524815897',
+ 'ext': 'mp4',
+ 'title': 'No. 2',
+ 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
+ 'duration': 152,
+ }
+ }, {
+ 'url': 'http://www.imdb.com/video/_/vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/videoplayer/vi1562949145',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
+ query={
+ 'key': base64.b64encode(json.dumps({
+ 'type': 'VIDEO_PLAYER',
+ 'subType': 'FORCE_LEGACY',
+ 'id': 'vi%s' % video_id,
+ }).encode()).decode(),
+ })[0]
+
+ quality = qualities(('SD', '480p', '720p', '1080p'))
+ formats = []
+ for encoding in data['videoLegacyEncodings']:
+ if not encoding or not isinstance(encoding, dict):
+ continue
+ video_url = url_or_none(encoding.get('url'))
+ if not video_url:
+ continue
+ ext = mimetype2ext(encoding.get(
+ 'mimeType')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=1, m3u8_id='hls', fatal=False))
+ continue
+ format_id = encoding.get('definition')
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'quality': quality(format_id),
+ })
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'https://www.imdb.com/video/vi' + video_id, video_id)
+ video_metadata = self._parse_json(self._search_regex(
+ r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
+ 'video metadata'), video_id)
+
+ video_info = video_metadata.get('VIDEO_INFO')
+ if video_info and isinstance(video_info, dict):
+ info = try_get(
+ video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
+ else:
+ info = {}
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title',
+ default=None) or info['videoTitle']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': info.get('videoSubTitle'),
+ 'formats': formats,
+ 'description': info.get('videoDescription'),
+ 'thumbnail': url_or_none(try_get(
+ video_metadata, lambda x: x['videoSlate']['source'])),
+ 'duration': parse_duration(info.get('videoRuntime')),
+ }
+
+
+class ImdbListIE(InfoExtractor):
+ IE_NAME = 'imdb:list'
+ IE_DESC = 'Internet Movie Database lists'
+ _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
+ _TEST = {
+ 'url': 'https://www.imdb.com/list/ls009921623/',
+ 'info_dict': {
+ 'id': '009921623',
+ 'title': 'The Bourne Legacy',
+ 'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
+ },
+ 'playlist_count': 8,
+ }
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ webpage = self._download_webpage(url, list_id)
+ entries = [
+ self.url_result('http://www.imdb.com' + m, 'Imdb')
+ for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
+
+ list_title = self._html_search_regex(
+ r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
+ webpage, 'list title')
+ list_description = self._html_search_regex(
+ r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
+ webpage, 'list description')
+
+ return self.playlist_result(entries, list_id, list_title, list_description)
diff --git a/hypervideo_dl/extractor/imggaming.py b/hypervideo_dl/extractor/imggaming.py
new file mode 100644
index 0000000..e11f920
--- /dev/null
+++ b/hypervideo_dl/extractor/imggaming.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class ImgGamingBaseIE(InfoExtractor):
+ _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
+ _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
+ _HEADERS = None
+ _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
+ _REALM = None
+ _VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
+
+ def _real_initialize(self):
+ self._HEADERS = {
+ 'Realm': 'dce.' + self._REALM,
+ 'x-api-key': self._API_KEY,
+ }
+
+ email, password = self._get_login_info()
+ if email is None:
+ self.raise_login_required()
+
+ p_headers = self._HEADERS.copy()
+ p_headers['Content-Type'] = 'application/json'
+ self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
+ self._API_BASE + 'login',
+ None, 'Logging in', data=json.dumps({
+ 'id': email,
+ 'secret': password,
+ }).encode(), headers=p_headers)['authorisationToken']
+
+ def _call_api(self, path, media_id):
+ return self._download_json(
+ self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
+
+ def _extract_dve_api_url(self, media_id, media_type):
+ stream_path = 'stream'
+ if media_type == 'video':
+ stream_path += '/vod/'
+ else:
+ stream_path += '?eventId='
+ try:
+ return self._call_api(
+ stream_path, media_id)['playerUrlCallback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(
+ self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
+ expected=True)
+ raise
+
+ def _real_extract(self, url):
+ domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+ media_type, media_id = 'playlist', playlist_id
+
+ if media_type == 'playlist':
+ playlist = self._call_api('vod/playlist/', media_id)
+ entries = []
+ for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
+ video_id = str_or_none(video.get('id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://%s/video/%s' % (domain, video_id),
+ self.ie_key(), video_id))
+ return self.playlist_result(
+ entries, media_id, playlist.get('title'),
+ playlist.get('description'))
+
+ dve_api_url = self._extract_dve_api_url(media_id, media_type)
+ video_data = self._download_json(dve_api_url, media_id)
+ is_live = media_type == 'live'
+ if is_live:
+ title = self._live_title(self._call_api('event/', media_id)['title'])
+ else:
+ title = video_data['name']
+
+ formats = []
+ for proto in ('hls', 'dash'):
+ media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
+ if not media_url:
+ continue
+ if proto == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
+ for f in m3u8_formats:
+ f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
+ formats.append(f)
+ else:
+ formats.extend(self._extract_mpd_formats(
+ media_url, media_id, mpd_id='dash', fatal=False,
+ headers=self._MANIFEST_HEADERS))
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for subtitle in video_data.get('subtitles', []):
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
+ 'url': subtitle_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'tags': video_data.get('tags'),
+ 'is_live': is_live,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/imgur.py b/hypervideo_dl/extractor/imgur.py
new file mode 100644
index 0000000..511fa5f
--- /dev/null
+++ b/hypervideo_dl/extractor/imgur.py
@@ -0,0 +1,154 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ ExtractorError,
+)
+
+
+class ImgurIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://i.imgur.com/A61SaA1.gifv',
+ 'info_dict': {
+ 'id': 'A61SaA1',
+ 'ext': 'mp4',
+ 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+ },
+ }, {
+ 'url': 'https://imgur.com/A61SaA1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://i.imgur.com/crGpqCV.mp4',
+ 'only_matching': True,
+ }, {
+ # no title
+ 'url': 'https://i.imgur.com/jxBXAMC.gifv',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
+
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, default=None))
+
+ video_elements = self._search_regex(
+ r'(?s)<div class="video-elements">(.*?)</div>',
+ webpage, 'video elements', default=None)
+ if not video_elements:
+ raise ExtractorError(
+ 'No sources found for video %s. Maybe an image?' % video_id,
+ expected=True)
+
+ formats = []
+ for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
+ formats.append({
+ 'format_id': m.group('type').partition('/')[2],
+ 'url': self._proto_relative_url(m.group('src')),
+ 'ext': mimetype2ext(m.group('type')),
+ 'width': width,
+ 'height': height,
+ 'http_headers': {
+ 'User-Agent': 'hypervideo (like wget)',
+ },
+ })
+
+ gif_json = self._search_regex(
+ r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+ webpage, 'GIF code', fatal=False)
+ if gif_json:
+ gifd = self._parse_json(
+ gif_json, video_id, transform_source=js_to_json)
+ formats.append({
+ 'format_id': 'gif',
+ 'preference': -10,
+ 'width': width,
+ 'height': height,
+ 'ext': 'gif',
+ 'acodec': 'none',
+ 'vcodec': 'gif',
+ 'container': 'gif',
+ 'url': self._proto_relative_url(gifd['gifUrl']),
+ 'filesize': gifd.get('size'),
+ 'http_headers': {
+ 'User-Agent': 'hypervideo (like wget)',
+ },
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': self._og_search_title(webpage, default=video_id),
+ }
+
+
+class ImgurGalleryIE(InfoExtractor):
+ IE_NAME = 'imgur:gallery'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://imgur.com/gallery/Q95ko',
+ 'info_dict': {
+ 'id': 'Q95ko',
+ 'title': 'Adding faces make every GIF better',
+ },
+ 'playlist_count': 25,
+ }, {
+ 'url': 'http://imgur.com/topic/Aww/ll5Vk',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://imgur.com/gallery/YcAQlkx',
+ 'info_dict': {
+ 'id': 'YcAQlkx',
+ 'ext': 'mp4',
+ 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+ }
+ }, {
+ 'url': 'http://imgur.com/topic/Funny/N8rOudd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/r/aww/VQcQPhM',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ gallery_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://imgur.com/gallery/%s.json' % gallery_id,
+ gallery_id)['data']['image']
+
+ if data.get('is_album'):
+ entries = [
+ self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
+ for image in data['album_images']['images'] if image.get('hash')]
+ return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
+
+ return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
+
+
+class ImgurAlbumIE(ImgurGalleryIE):
+ IE_NAME = 'imgur:album'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://imgur.com/a/j6Orj',
+ 'info_dict': {
+ 'id': 'j6Orj',
+ 'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+ },
+ 'playlist_count': 12,
+ }]
diff --git a/hypervideo_dl/extractor/ina.py b/hypervideo_dl/extractor/ina.py
new file mode 100644
index 0000000..b3b2683
--- /dev/null
+++ b/hypervideo_dl/extractor/ina.py
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ strip_or_none,
+ xpath_attr,
+ xpath_text,
+)
+
+
+class InaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+ _TESTS = [{
+ 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
+ 'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
+ 'info_dict': {
+ 'id': 'I12055569',
+ 'ext': 'mp4',
+ 'title': 'François Hollande "Je crois que c\'est clair"',
+ 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
+ }
+ }, {
+ 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/audio/P16173408',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/video/P16173408-video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.ina.fr/video/I12055569',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info_doc = self._download_xml(
+ 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
+ item = info_doc.find('channel/item')
+ title = xpath_text(item, 'title', fatal=True)
+ media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
+ content = item.find(media_ns_xpath('content'))
+
+ get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
+ formats = []
+ for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
+ q_url = get_furl(q)
+ if not q_url:
+ continue
+ formats.append({
+ 'format_id': q,
+ 'url': q_url,
+ 'width': w,
+ 'height': h,
+ })
+ if not formats:
+ furl = get_furl('player') or content.attrib['url']
+ ext = determine_ext(furl)
+ formats = [{
+ 'url': furl,
+ 'vcodec': 'none' if ext == 'mp3' else None,
+ 'ext': ext,
+ }]
+
+ thumbnails = []
+ for thumbnail in content.findall(media_ns_xpath('thumbnail')):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(xpath_text(item, 'description')),
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/inc.py b/hypervideo_dl/extractor/inc.py
new file mode 100644
index 0000000..d5b258a
--- /dev/null
+++ b/hypervideo_dl/extractor/inc.py
@@ -0,0 +1,59 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class IncIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P<id>[^.]+).html'
+ _TESTS = [{
+ 'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html',
+ 'md5': '7416739c9c16438c09fa35619d6ba5cb',
+ 'info_dict': {
+ 'id': '1_wqig47aq',
+ 'ext': 'mov',
+ 'title': 'Bill Gates Says These 5 Books Will Make You Smarter',
+ 'description': 'md5:bea7ff6cce100886fc1995acb743237e',
+ 'timestamp': 1474414430,
+ 'upload_date': '20160920',
+ 'uploader_id': 'video@inc.com',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # div with id=kaltura_player_1_kqs38cgm
+ 'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
+ 'info_dict': {
+ 'id': '1_kqs38cgm',
+ 'ext': 'mp4',
+ 'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
+ 'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
+ 'timestamp': 1364403232,
+ 'upload_date': '20130327',
+ 'uploader_id': 'incdigital@inc.com',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ partner_id = self._search_regex(
+ r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
+ 'partner id', default='1034971')
+
+ kaltura_id = self._search_regex(
+ r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
+ default=None, group='id') or self._parse_json(self._search_regex(
+ r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
+ display_id)['vid_kaltura_id']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
diff --git a/hypervideo_dl/extractor/indavideo.py b/hypervideo_dl/extractor/indavideo.py
new file mode 100644
index 0000000..4c16243
--- /dev/null
+++ b/hypervideo_dl/extractor/indavideo.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ update_url_query,
+)
+
+
+class IndavideoEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
+ 'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
+ 'info_dict': {
+ 'id': '1837039',
+ 'ext': 'mp4',
+ 'title': 'Cicatánc',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'cukiajanlo',
+ 'uploader_id': '83729',
+ 'timestamp': 1439193826,
+ 'upload_date': '20150810',
+ 'duration': 72,
+ 'age_limit': 0,
+ 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
+ },
+ }, {
+ 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
+ 'only_matching': True,
+ }]
+
+ # Some example URLs covered by generic extractor:
+ # http://indavideo.hu/video/Vicces_cica_1
+ # http://index.indavideo.hu/video/2015_0728_beregszasz
+ # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+ # http://erotika.indavideo.hu/video/Amator_tini_punci
+ # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+ # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
+ video_id)['data']
+
+ title = video['title']
+
+ video_urls = []
+
+ video_files = video.get('video_files')
+ if isinstance(video_files, list):
+ video_urls.extend(video_files)
+ elif isinstance(video_files, dict):
+ video_urls.extend(video_files.values())
+
+ video_file = video.get('video_file')
+ if video:
+ video_urls.append(video_file)
+ video_urls = list(set(video_urls))
+
+ video_prefix = video_urls[0].rsplit('/', 1)[0]
+
+ for flv_file in video.get('flv_files', []):
+ flv_url = '%s/%s' % (video_prefix, flv_file)
+ if flv_url not in video_urls:
+ video_urls.append(flv_url)
+
+ filesh = video.get('filesh')
+
+ formats = []
+ for video_url in video_urls:
+ height = int_or_none(self._search_regex(
+ r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
+ if filesh:
+ if not height:
+ continue
+ token = filesh.get(compat_str(height))
+ if token is None:
+ continue
+ video_url = update_url_query(video_url, {'token': token})
+ formats.append({
+ 'url': video_url,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ timestamp = video.get('date')
+ if timestamp:
+ # upload date is in CEST
+ timestamp = parse_iso8601(timestamp + ' +0200', ' ')
+
+ thumbnails = [{
+ 'url': self._proto_relative_url(thumbnail)
+ } for thumbnail in video.get('thumbnails', [])]
+
+ tags = [tag['title'] for tag in video.get('tags') or []]
+
+ return {
+ 'id': video.get('id') or video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnails': thumbnails,
+ 'uploader': video.get('user_name'),
+ 'uploader_id': video.get('user_id'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(video.get('length')),
+ 'age_limit': parse_age_limit(video.get('age_limit')),
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/infoq.py b/hypervideo_dl/extractor/infoq.py
new file mode 100644
index 0000000..0a70a1f
--- /dev/null
+++ b/hypervideo_dl/extractor/infoq.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ update_url_query,
+)
+from .bokecc import BokeCCBaseIE
+
+
+class InfoQIE(BokeCCBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
+ 'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
+ 'info_dict': {
+ 'id': 'A-Few-of-My-Favorite-Python-Things',
+ 'ext': 'mp4',
+ 'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
+ 'title': 'A Few of My Favorite [Python] Things',
+ },
+ }, {
+ 'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery',
+ 'md5': '4918d0cca1497f2244572caf626687ef',
+ 'info_dict': {
+ 'id': 'openstack-continued-delivery',
+ 'title': 'OpenStack持续交付之路',
+ 'ext': 'flv',
+ 'description': 'md5:308d981fb28fa42f49f9568322c683ff',
+ },
+ }, {
+ 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
+ 'md5': '0e34642d4d9ef44bf86f66f6399672db',
+ 'info_dict': {
+ 'id': 'Simple-Made-Easy',
+ 'title': 'Simple Made Easy',
+ 'ext': 'mp3',
+ 'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
+ },
+ 'params': {
+ 'format': 'bestaudio',
+ },
+ }]
+
+ def _extract_rtmp_video(self, webpage):
+ # The server URL is hardcoded
+ video_url = 'rtmpe://videof.infoq.com/cfx/st/'
+
+ # Extract video URL
+ encoded_id = self._search_regex(
+ r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
+
+ real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8'))
+ playpath = 'mp4:' + real_id
+
+ return [{
+ 'format_id': 'rtmp_video',
+ 'url': video_url,
+ 'ext': determine_ext(playpath),
+ 'play_path': playpath,
+ }]
+
+ def _extract_cf_auth(self, webpage):
+ policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
+ signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
+ key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+ return {
+ 'Policy': policy,
+ 'Signature': signature,
+ 'Key-Pair-Id': key_pair_id,
+ }
+
+ def _extract_http_video(self, webpage):
+ http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
+ http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
+ return [{
+ 'format_id': 'http_video',
+ 'url': http_video_url,
+ 'http_headers': {'Referer': 'https://www.infoq.com/'},
+ }]
+
+ def _extract_http_audio(self, webpage, video_id):
+ fields = self._form_hidden_inputs('mp3Form', webpage)
+ http_audio_url = fields.get('filename')
+ if not http_audio_url:
+ return []
+
+ # base URL is found in the Location header in the response returned by
+ # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
+ http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
+ http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
+
+ # audio file seem to be missing some times even if there is a download link
+ # so probe URL to make sure
+ if not self._is_valid_url(http_audio_url, video_id):
+ return []
+
+ return [{
+ 'format_id': 'http_audio',
+ 'url': http_audio_url,
+ 'vcodec': 'none',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
+ video_description = self._html_search_meta('description', webpage, 'description')
+
+ if '/cn/' in url:
+ # for China videos, HTTP video URL exists but always fails with 403
+ formats = self._extract_bokecc_formats(webpage, video_id)
+ else:
+ formats = (
+ self._extract_rtmp_video(webpage)
+ + self._extract_http_video(webpage)
+ + self._extract_http_audio(webpage, video_id))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/instagram.py b/hypervideo_dl/extractor/instagram.py
new file mode 100644
index 0000000..12e1014
--- /dev/null
+++ b/hypervideo_dl/extractor/instagram.py
@@ -0,0 +1,474 @@
+from __future__ import unicode_literals
+
+import itertools
+import hashlib
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ get_element_by_attribute,
+ int_or_none,
+ lowercase_escape,
+ std_headers,
+ try_get,
+ url_or_none,
+)
+
+
+class InstagramIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
+ 'md5': '0d2da106a9d2631273e192b372806516',
+ 'info_dict': {
+ 'id': 'aye83DjauH',
+ 'ext': 'mp4',
+ 'title': 'Video by naomipq',
+ 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 0,
+ 'timestamp': 1371748545,
+ 'upload_date': '20130620',
+ 'uploader_id': 'naomipq',
+ 'uploader': 'B E A U T Y F O R A S H E S',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ },
+ }, {
+ # missing description
+ 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
+ 'info_dict': {
+ 'id': 'BA-pQFBG8HZ',
+ 'ext': 'mp4',
+ 'title': 'Video by britneyspears',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 0,
+ 'timestamp': 1453760977,
+ 'upload_date': '20160125',
+ 'uploader_id': 'britneyspears',
+ 'uploader': 'Britney Spears',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # multi video post
+ 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BQ0dSaohpPW',
+ 'ext': 'mp4',
+ 'title': 'Video 1',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'BQ0dTpOhuHT',
+ 'ext': 'mp4',
+ 'title': 'Video 2',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'BQ0dT7RBFeF',
+ 'ext': 'mp4',
+ 'title': 'Video 3',
+ },
+ }],
+ 'info_dict': {
+ 'id': 'BQ0eAlwhDrw',
+ 'title': 'Post by instagram',
+ 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
+ },
+ }, {
+ # IGTV
+ 'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
+ 'info_dict': {
+ 'id': 'BkfuX9UB-eK',
+ 'ext': 'mp4',
+ 'title': 'Fingerboarding Tricks with @cass.fb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 53.83,
+ 'timestamp': 1530032919,
+ 'upload_date': '20180626',
+ 'uploader_id': 'instagram',
+ 'uploader': 'Instagram',
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': list,
+ 'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
+ }
+ }, {
+ 'url': 'https://instagram.com/p/-Cmh1cukG2/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/tv/aye83DjauH/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_embed_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ blockquote_el = get_element_by_attribute(
+ 'class', 'instagram-media', webpage)
+ if blockquote_el is None:
+ return
+
+ mobj = re.search(
+ r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
+ if mobj:
+ return mobj.group('link')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ url = mobj.group('url')
+
+ webpage = self._download_webpage(url, video_id)
+
+ (media, video_url, description, thumbnail, timestamp, uploader,
+ uploader_id, like_count, comment_count, comments, height,
+ width) = [None] * 12
+
+ shared_data = self._parse_json(
+ self._search_regex(
+ r'window\._sharedData\s*=\s*({.+?});',
+ webpage, 'shared data', default='{}'),
+ video_id, fatal=False)
+ if shared_data:
+ media = try_get(
+ shared_data,
+ (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
+ lambda x: x['entry_data']['PostPage'][0]['media']),
+ dict)
+ # _sharedData.entry_data.PostPage is empty when authenticated (see
+ # https://github.com/ytdl-org/youtube-dl/pull/22880)
+ if not media:
+ additional_data = self._parse_json(
+ self._search_regex(
+ r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
+ webpage, 'additional data', default='{}'),
+ video_id, fatal=False)
+ if additional_data:
+ media = try_get(
+ additional_data, lambda x: x['graphql']['shortcode_media'],
+ dict)
+ if media:
+ video_url = media.get('video_url')
+ height = int_or_none(media.get('dimensions', {}).get('height'))
+ width = int_or_none(media.get('dimensions', {}).get('width'))
+ description = try_get(
+ media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
+ compat_str) or media.get('caption')
+ title = media.get('title')
+ thumbnail = media.get('display_src') or media.get('display_url')
+ duration = float_or_none(media.get('video_duration'))
+ timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
+ uploader = media.get('owner', {}).get('full_name')
+ uploader_id = media.get('owner', {}).get('username')
+
+ def get_count(keys, kind):
+ if not isinstance(keys, (list, tuple)):
+ keys = [keys]
+ for key in keys:
+ count = int_or_none(try_get(
+ media, (lambda x: x['edge_media_%s' % key]['count'],
+ lambda x: x['%ss' % kind]['count'])))
+ if count is not None:
+ return count
+ like_count = get_count('preview_like', 'like')
+ comment_count = get_count(
+ ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
+
+ comments = [{
+ 'author': comment.get('user', {}).get('username'),
+ 'author_id': comment.get('user', {}).get('id'),
+ 'id': comment.get('id'),
+ 'text': comment.get('text'),
+ 'timestamp': int_or_none(comment.get('created_at')),
+ } for comment in media.get(
+ 'comments', {}).get('nodes', []) if comment.get('text')]
+ if not video_url:
+ edges = try_get(
+ media, lambda x: x['edge_sidecar_to_children']['edges'],
+ list) or []
+ if edges:
+ entries = []
+ for edge_num, edge in enumerate(edges, start=1):
+ node = try_get(edge, lambda x: x['node'], dict)
+ if not node:
+ continue
+ node_video_url = url_or_none(node.get('video_url'))
+ if not node_video_url:
+ continue
+ entries.append({
+ 'id': node.get('shortcode') or node['id'],
+ 'title': node.get('title') or 'Video %d' % edge_num,
+ 'url': node_video_url,
+ 'thumbnail': node.get('display_url'),
+ 'duration': float_or_none(node.get('video_duration')),
+ 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
+ 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
+ 'view_count': int_or_none(node.get('video_view_count')),
+ })
+ return self.playlist_result(
+ entries, video_id,
+ 'Post by %s' % uploader_id if uploader_id else None,
+ description)
+
+ if not video_url:
+ video_url = self._og_search_video_url(webpage, secure=False)
+
+ formats = [{
+ 'url': video_url,
+ 'width': width,
+ 'height': height,
+ }]
+
+ if not uploader_id:
+ uploader_id = self._search_regex(
+ r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
+ webpage, 'uploader id', fatal=False)
+
+ if not description:
+ description = self._search_regex(
+ r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None)
+ if description is not None:
+ description = lowercase_escape(description)
+
+ if not thumbnail:
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'ext': 'mp4',
+ 'title': title or 'Video by %s' % uploader_id,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'uploader': uploader,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'comments': comments,
+ }
+
+
+class InstagramPlaylistIE(InfoExtractor):
+ # A superclass for handling any kind of query based on GraphQL which
+ # results in a playlist.
+
+ _gis_tmpl = None # used to cache GIS request type
+
+ def _parse_graphql(self, webpage, item_id):
+ # Reads a webpage and returns its GraphQL data.
+ return self._parse_json(
+ self._search_regex(
+ r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
+ item_id)
+
+ def _extract_graphql(self, data, url):
+ # Parses GraphQL queries containing videos and generates a playlist.
+ def get_count(suffix):
+ return int_or_none(try_get(
+ node, lambda x: x['edge_media_' + suffix]['count']))
+
+ uploader_id = self._match_id(url)
+ csrf_token = data['config']['csrf_token']
+ rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
+
+ cursor = ''
+ for page_num in itertools.count(1):
+ variables = {
+ 'first': 12,
+ 'after': cursor,
+ }
+ variables.update(self._query_vars_for(data))
+ variables = json.dumps(variables)
+
+ if self._gis_tmpl:
+ gis_tmpls = [self._gis_tmpl]
+ else:
+ gis_tmpls = [
+ '%s' % rhx_gis,
+ '',
+ '%s:%s' % (rhx_gis, csrf_token),
+ '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
+ ]
+
+ # try all of the ways to generate a GIS query, and not only use the
+ # first one that works, but cache it for future requests
+ for gis_tmpl in gis_tmpls:
+ try:
+ json_data = self._download_json(
+ 'https://www.instagram.com/graphql/query/', uploader_id,
+ 'Downloading JSON page %d' % page_num, headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-Instagram-GIS': hashlib.md5(
+ ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
+ }, query={
+ 'query_hash': self._QUERY_HASH,
+ 'variables': variables,
+ })
+ media = self._parse_timeline_from(json_data)
+ self._gis_tmpl = gis_tmpl
+ break
+ except ExtractorError as e:
+ # if it's an error caused by a bad query, and there are
+ # more GIS templates to try, ignore it and keep trying
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if gis_tmpl != gis_tmpls[-1]:
+ continue
+ raise
+
+ edges = media.get('edges')
+ if not edges or not isinstance(edges, list):
+ break
+
+ for edge in edges:
+ node = edge.get('node')
+ if not node or not isinstance(node, dict):
+ continue
+ if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
+ continue
+ video_id = node.get('shortcode')
+ if not video_id:
+ continue
+
+ info = self.url_result(
+ 'https://instagram.com/p/%s/' % video_id,
+ ie=InstagramIE.ie_key(), video_id=video_id)
+
+ description = try_get(
+ node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
+ compat_str)
+ thumbnail = node.get('thumbnail_src') or node.get('display_src')
+ timestamp = int_or_none(node.get('taken_at_timestamp'))
+
+ comment_count = get_count('to_comment')
+ like_count = get_count('preview_like')
+ view_count = int_or_none(node.get('video_view_count'))
+
+ info.update({
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'comment_count': comment_count,
+ 'like_count': like_count,
+ 'view_count': view_count,
+ })
+
+ yield info
+
+ page_info = media.get('page_info')
+ if not page_info or not isinstance(page_info, dict):
+ break
+
+ has_next_page = page_info.get('has_next_page')
+ if not has_next_page:
+ break
+
+ cursor = page_info.get('end_cursor')
+ if not cursor or not isinstance(cursor, compat_str):
+ break
+
+ def _real_extract(self, url):
+ user_or_tag = self._match_id(url)
+ webpage = self._download_webpage(url, user_or_tag)
+ data = self._parse_graphql(webpage, user_or_tag)
+
+ self._set_cookie('instagram.com', 'ig_pr', '1')
+
+ return self.playlist_result(
+ self._extract_graphql(data, url), user_or_tag, user_or_tag)
+
+
+class InstagramUserIE(InstagramPlaylistIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+ _TEST = {
+ 'url': 'https://instagram.com/porsche',
+ 'info_dict': {
+ 'id': 'porsche',
+ 'title': 'porsche',
+ },
+ 'playlist_count': 5,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 5,
+ }
+ }
+
+ _QUERY_HASH = '42323d64886122307be10013ad2dcc44',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['user']['edge_owner_to_timeline_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ }
+
+
+class InstagramTagIE(InstagramPlaylistIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
+ IE_DESC = 'Instagram hashtag search'
+ IE_NAME = 'instagram:tag'
+ _TEST = {
+ 'url': 'https://instagram.com/explore/tags/lolcats',
+ 'info_dict': {
+ 'id': 'lolcats',
+ 'title': 'lolcats',
+ },
+ 'playlist_count': 50,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 50,
+ }
+ }
+
+ _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['hashtag']['edge_hashtag_to_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'tag_name':
+ data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
+ }
diff --git a/hypervideo_dl/extractor/internazionale.py b/hypervideo_dl/extractor/internazionale.py
new file mode 100644
index 0000000..676e8e2
--- /dev/null
+++ b/hypervideo_dl/extractor/internazionale.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_timestamp
+
+
+class InternazionaleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
+ 'md5': '3e39d32b66882c1218e305acbf8348ca',
+ 'info_dict': {
+ 'id': '265968',
+ 'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
+ 'ext': 'mp4',
+ 'title': 'Richard Linklater racconta una scena di Boyhood',
+ 'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
+ 'timestamp': 1424354635,
+ 'upload_date': '20150219',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
+ 'md5': '9db8663704cab73eb972d1cee0082c79',
+ 'info_dict': {
+ 'id': '761344',
+ 'display_id': 'telefono-stare-con-noi-stessi',
+ 'ext': 'mp4',
+ 'title': 'Usiamo il telefono per evitare di stare con noi stessi',
+ 'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
+ 'timestamp': 1535528954,
+ 'upload_date': '20180829',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
+
+ title = self._search_regex(
+ DATA_RE % 'video-title', webpage, 'title', default=None,
+ group='value') or self._og_search_title(webpage)
+
+ video_id = self._search_regex(
+ DATA_RE % 'job-id', webpage, 'video id', group='value')
+ video_path = self._search_regex(
+ DATA_RE % 'video-path', webpage, 'video path', group='value')
+ video_available_abroad = self._search_regex(
+ DATA_RE % 'video-available_abroad', webpage,
+ 'video available aboard', default='1', group='value')
+ video_available_abroad = video_available_abroad == '1'
+
+ video_base = 'https://video%s.internazionale.it/%s/%s.' % \
+ ('' if video_available_abroad else '-ita', video_path, video_id)
+
+ formats = self._extract_m3u8_formats(
+ video_base + 'm3u8', display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/internetvideoarchive.py b/hypervideo_dl/extractor/internetvideoarchive.py
new file mode 100644
index 0000000..59b0a90
--- /dev/null
+++ b/hypervideo_dl/extractor/internetvideoarchive.py
@@ -0,0 +1,64 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class InternetVideoArchiveIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
+
+ _TEST = {
+ 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
+ 'info_dict': {
+ 'id': '194487',
+ 'ext': 'mp4',
+ 'title': 'Kick-Ass 2',
+ 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ @staticmethod
+ def _build_json_url(query):
+ return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
+
+ def _real_extract(self, url):
+ query = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = query['publishedid'][0]
+ data = self._download_json(
+ 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
+ video_id, data=json.dumps({
+ 'customerid': query['customerid'][0],
+ 'publishedid': video_id,
+ }).encode())
+ title = data['Title']
+ formats = self._extract_m3u8_formats(
+ data['VideoUrl'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ file_url = formats[0]['url']
+ if '.ism/' in file_url:
+ replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
+ formats.extend(self._extract_f4m_formats(
+ replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': data.get('PosterUrl'),
+ 'description': data.get('Description'),
+ }
diff --git a/hypervideo_dl/extractor/iprima.py b/hypervideo_dl/extractor/iprima.py
new file mode 100644
index 0000000..648ae67
--- /dev/null
+++ b/hypervideo_dl/extractor/iprima.py
@@ -0,0 +1,149 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+)
+
+
+class IPrimaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_BYPASS = False
+
+ _TESTS = [{
+ 'url': 'https://prima.iprima.cz/particka/92-epizoda',
+ 'info_dict': {
+ 'id': 'p51388',
+ 'ext': 'mp4',
+ 'title': 'Partička (92)',
+ 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+ 'info_dict': {
+ 'id': 'p681554',
+ 'ext': 'mp4',
+ 'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'http://play.iprima.cz/particka/particka-92',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
+ 'only_matching': True,
+ }, {
+ # iframe api.play-backend.iprima.cz
+ 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
+ 'only_matching': True,
+ }, {
+ # iframe prima.iprima.cz
+ 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.iprima.cz/filmy/desne-rande',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title')
+
+ video_id = self._search_regex(
+ (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
+ r'data-product="([^"]+)">',
+ r'id=["\']player-(p\d+)"',
+ r'playerId\s*:\s*["\']player-(p\d+)',
+ r'\bvideos\s*=\s*["\'](p\d+)'),
+ webpage, 'real id')
+
+ playerpage = self._download_webpage(
+ 'http://play.iprima.cz/prehravac/init',
+ video_id, note='Downloading player', query={
+ '_infuse': 1,
+ '_ts': round(time.time()),
+ 'productId': video_id,
+ }, headers={'Referer': url})
+
+ formats = []
+
+ def extract_formats(format_url, format_key=None, lang=None):
+ ext = determine_ext(format_url)
+ new_formats = []
+ if format_key == 'hls' or ext == 'm3u8':
+ new_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ elif format_key == 'dash' or ext == 'mpd':
+ return
+ new_formats = self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ if lang:
+ for f in new_formats:
+ if not f.get('language'):
+ f['language'] = lang
+ formats.extend(new_formats)
+
+ options = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
+ playerpage, 'player options', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if options:
+ for key, tracks in options.get('tracks', {}).items():
+ if not isinstance(tracks, list):
+ continue
+ for track in tracks:
+ src = track.get('src')
+ if src:
+ extract_formats(src, key.lower(), track.get('lang'))
+
+ if not formats:
+ for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
+ extract_formats(src)
+
+ if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+ self.raise_geo_restricted(countries=['CZ'])
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage, default=None),
+ }
diff --git a/hypervideo_dl/extractor/iqiyi.py b/hypervideo_dl/extractor/iqiyi.py
new file mode 100644
index 0000000..6df5214
--- /dev/null
+++ b/hypervideo_dl/extractor/iqiyi.py
@@ -0,0 +1,219 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import itertools
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ get_element_by_id,
+ get_element_by_attribute,
+ ExtractorError,
+ ohdave_rsa_encrypt,
+ remove_start,
+)
+
+
+def md5_text(text):
+ return hashlib.md5(text.encode('utf-8')).hexdigest()
+
+
+class IqiyiIE(InfoExtractor):
+ IE_NAME = 'iqiyi'
+ IE_DESC = '爱奇艺'
+
+ _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
+
+ _NETRC_MACHINE = 'iqiyi'
+
+ _TESTS = [{
+ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
+ # MD5 checksum differs on my machine and Travis CI
+ 'info_dict': {
+ 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
+ 'ext': 'mp4',
+ 'title': '美国德州空中惊现奇异云团 酷似UFO',
+ }
+ }, {
+ 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
+ 'md5': 'b7dc800a4004b1b57749d9abae0472da',
+ 'info_dict': {
+ 'id': 'e3f585b550a280af23c98b6cb2be19fb',
+ 'ext': 'mp4',
+ # This can be either Simplified Chinese or Traditional Chinese
+ 'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
+ },
+ 'skip': 'Geo-restricted to China',
+ }, {
+ 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://yule.iqiyi.com/pcb.html',
+ 'info_dict': {
+ 'id': '4a0af228fddb55ec96398a364248ed7f',
+ 'ext': 'mp4',
+ 'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
+ },
+ }, {
+ # VIP-only video. The first 2 parts (6 minutes) are available without login
+ # MD5 sums omitted as values are different on Travis CI and my machine
+ 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
+ 'info_dict': {
+ 'id': 'f3cf468b39dddb30d676f89a91200dc1',
+ 'ext': 'mp4',
+ 'title': '泰坦尼克号',
+ },
+ 'skip': 'Geo-restricted to China',
+ }, {
+ 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
+ 'info_dict': {
+ 'id': '202918101',
+ 'title': '灌篮高手 国语版',
+ },
+ 'playlist_count': 101,
+ }, {
+ 'url': 'http://www.pps.tv/w_19rrbav0ph.html',
+ 'only_matching': True,
+ }]
+
+ _FORMATS_MAP = {
+ '96': 1, # 216p, 240p
+ '1': 2, # 336p, 360p
+ '2': 3, # 480p, 504p
+ '21': 4, # 504p
+ '4': 5, # 720p
+ '17': 5, # 720p
+ '5': 6, # 1072p, 1080p
+ '18': 7, # 1080p
+ }
+
+ def _real_initialize(self):
+ self._login()
+
+ @staticmethod
+ def _rsa_fun(data):
+ # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
+ N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
+ e = 65537
+
+ return ohdave_rsa_encrypt(data, e, N)
+
+ def _login(self):
+ raise ExtractorError("iQiyi's non-free authentication algorithm has made login impossible", expected=True)
+
+ def get_raw_data(self, tvid, video_id):
+ tm = int(time.time() * 1000)
+
+ key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
+ sc = md5_text(compat_str(tm) + key + tvid)
+ params = {
+ 'tvid': tvid,
+ 'vid': video_id,
+ 'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
+ 'sc': sc,
+ 't': tm,
+ }
+
+ return self._download_json(
+ 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
+ video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
+ query=params, headers=self.geo_verification_headers())
+
+ def _extract_playlist(self, webpage):
+ PAGE_SIZE = 50
+
+ links = re.findall(
+ r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
+ webpage)
+ if not links:
+ return
+
+ album_id = self._search_regex(
+ r'albumId\s*:\s*(\d+),', webpage, 'album ID')
+ album_title = self._search_regex(
+ r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)
+
+ entries = list(map(self.url_result, links))
+
+ # Start from 2 because links in the first page are already on webpage
+ for page_num in itertools.count(2):
+ pagelist_page = self._download_webpage(
+ 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE),
+ album_id,
+ note='Download playlist page %d' % page_num,
+ errnote='Failed to download playlist page %d' % page_num)
+ pagelist = self._parse_json(
+ remove_start(pagelist_page, 'var tvInfoJs='), album_id)
+ vlist = pagelist['data']['vlist']
+ for item in vlist:
+ entries.append(self.url_result(item['vurl']))
+ if len(vlist) < PAGE_SIZE:
+ break
+
+ return self.playlist_result(entries, album_id, album_title)
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(
+ url, 'temp_id', note='download video page')
+
+ # There's no simple way to determine whether an URL is a playlist or not
+ # Sometimes there are playlist links in individual videos, so treat it
+ # as a single video first
+ tvid = self._search_regex(
+ r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
+ if tvid is None:
+ playlist_result = self._extract_playlist(webpage)
+ if playlist_result:
+ return playlist_result
+ raise ExtractorError('Can\'t find any video')
+
+ video_id = self._search_regex(
+ r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
+
+ formats = []
+ for _ in range(5):
+ raw_data = self.get_raw_data(tvid, video_id)
+
+ if raw_data['code'] != 'A00000':
+ if raw_data['code'] == 'A00111':
+ self.raise_geo_restricted()
+ raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
+
+ data = raw_data['data']
+
+ for stream in data['vidl']:
+ if 'm3utx' not in stream:
+ continue
+ vd = compat_str(stream['vd'])
+ formats.append({
+ 'url': stream['m3utx'],
+ 'format_id': vd,
+ 'ext': 'mp4',
+ 'preference': self._FORMATS_MAP.get(vd, -1),
+ 'protocol': 'm3u8_native',
+ })
+
+ if formats:
+ break
+
+ self._sleep(5, video_id)
+
+ self._sort_formats(formats)
+ title = (get_element_by_id('widget-videotitle', webpage)
+ or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
+ or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ir90tv.py b/hypervideo_dl/extractor/ir90tv.py
new file mode 100644
index 0000000..d5a3f6f
--- /dev/null
+++ b/hypervideo_dl/extractor/ir90tv.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class Ir90TvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
+ _TESTS = [{
+ 'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
+ 'md5': '411dbd94891381960cb9e13daa47a869',
+ 'info_dict': {
+ 'id': '95719',
+ 'ext': 'mp4',
+ 'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = remove_start(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'video_url': video_url,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/itv.py b/hypervideo_dl/extractor/itv.py
new file mode 100644
index 0000000..e86c40b
--- /dev/null
+++ b/hypervideo_dl/extractor/itv.py
@@ -0,0 +1,185 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+from ..utils import (
+ clean_html,
+ determine_ext,
+ extract_attributes,
+ get_element_by_class,
+ JSON_LD_RE,
+ merge_dicts,
+ parse_duration,
+ smuggle_url,
+ url_or_none,
+)
+
+
+class ITVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+ _GEO_COUNTRIES = ['GB']
+ _TESTS = [{
+ 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
+ 'info_dict': {
+ 'id': '2a4547a0012',
+ 'ext': 'mp4',
+ 'title': 'Liar - Series 2 - Episode 6',
+ 'description': 'md5:d0f91536569dec79ea184f0a44cca089',
+ 'series': 'Liar',
+ 'season_number': 2,
+ 'episode_number': 6,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # unavailable via data-playlist-url
+ 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
+ 'only_matching': True,
+ }, {
+ # InvalidVodcrid
+ 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
+ 'only_matching': True,
+ }, {
+ # ContentUnavailable
+ 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ params = extract_attributes(self._search_regex(
+ r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
+
+ ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
+ hmac = params['data-video-hmac']
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
+ 'Content-Type': 'application/json',
+ 'hmac': hmac.upper(),
+ })
+ ios_playlist = self._download_json(
+ ios_playlist_url, video_id, data=json.dumps({
+ 'user': {
+ 'itvUserId': '',
+ 'entitlements': [],
+ 'token': ''
+ },
+ 'device': {
+ 'manufacturer': 'Safari',
+ 'model': '5',
+ 'os': {
+ 'name': 'Windows NT',
+ 'version': '6.1',
+ 'type': 'desktop'
+ }
+ },
+ 'client': {
+ 'version': '4.1',
+ 'id': 'browser'
+ },
+ 'variantAvailability': {
+ 'featureset': {
+ 'min': ['hls', 'aes', 'outband-webvtt'],
+ 'max': ['hls', 'aes', 'outband-webvtt']
+ },
+ 'platformTag': 'dotcom'
+ }
+ }).encode(), headers=headers)
+ video_data = ios_playlist['Playlist']['Video']
+ ios_base_url = video_data.get('Base')
+
+ formats = []
+ for media_file in (video_data.get('MediaFiles') or []):
+ href = media_file.get('Href')
+ if not href:
+ continue
+ if ios_base_url:
+ href = ios_base_url + href
+ ext = determine_ext(href)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ href, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': href,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subs = video_data.get('Subtitles') or []
+ for sub in subs:
+ if not isinstance(sub, dict):
+ continue
+ href = url_or_none(sub.get('Href'))
+ if not href:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': href,
+ 'ext': determine_ext(href, 'vtt'),
+ })
+
+ info = self._search_json_ld(webpage, video_id, default={})
+ if not info:
+ json_ld = self._parse_json(self._search_regex(
+ JSON_LD_RE, webpage, 'JSON-LD', '{}',
+ group='json_ld'), video_id, fatal=False)
+ if json_ld and json_ld.get('@type') == 'BreadcrumbList':
+ for ile in (json_ld.get('itemListElement:') or []):
+ item = ile.get('item:') or {}
+ if item.get('@type') == 'TVEpisode':
+ item['@context'] = 'http://schema.org'
+ info = self._json_ld(item, video_id, fatal=False) or {}
+ break
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': parse_duration(video_data.get('Duration')),
+ 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
+ }, info)
+
+
+class ITVBTCCIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+ 'info_dict': {
+ 'id': 'btcc-2018-all-the-action-from-brands-hatch',
+ 'title': 'BTCC 2018: All the action from Brands Hatch',
+ },
+ 'playlist_mincount': 9,
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result(
+ smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
+ # ITV does not like some GB IP ranges, so here are some
+ # IP blocks it accepts
+ 'geo_ip_blocks': [
+ '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+ ],
+ 'referrer': url,
+ }),
+ ie=BrightcoveNewIE.ie_key(), video_id=video_id)
+ for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+
+ title = self._og_search_title(webpage, fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/hypervideo_dl/extractor/ivi.py b/hypervideo_dl/extractor/ivi.py
new file mode 100644
index 0000000..04c54e8
--- /dev/null
+++ b/hypervideo_dl/extractor/ivi.py
@@ -0,0 +1,271 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+import sys
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+)
+
+
+class IviIE(InfoExtractor):
+ IE_DESC = 'ivi.ru'
+ IE_NAME = 'ivi'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
+ _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
+ _LIGHT_URL = 'https://api.ivi.ru/light/'
+
+ _TESTS = [
+ # Single movie
+ {
+ 'url': 'http://www.ivi.ru/watch/53141',
+ 'md5': '6ff5be2254e796ed346251d117196cf4',
+ 'info_dict': {
+ 'id': '53141',
+ 'ext': 'mp4',
+ 'title': 'Иван Васильевич меняет профессию',
+ 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
+ 'duration': 5498,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ # Serial's series
+ {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+ 'md5': '221f56b35e3ed815fde2df71032f4b3e',
+ 'info_dict': {
+ 'id': '9549',
+ 'ext': 'mp4',
+ 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
+ 'series': 'Двое из ларца',
+ 'season': 'Сезон 1',
+ 'season_number': 1,
+ 'episode': 'Дело Гольдберга (1 часть)',
+ 'episode_number': 1,
+ 'duration': 2655,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ {
+ # with MP4-HD720 format
+ 'url': 'http://www.ivi.ru/watch/146500',
+ 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
+ 'info_dict': {
+ 'id': '146500',
+ 'ext': 'mp4',
+ 'title': 'Кукла',
+ 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
+ 'duration': 5599,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ {
+ 'url': 'https://www.ivi.tv/watch/33560/',
+ 'only_matching': True,
+ },
+ ]
+
+ # Sorted by quality
+ _KNOWN_FORMATS = (
+ 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
+ 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = json.dumps({
+ 'method': 'da.content.get',
+ 'params': [
+ video_id, {
+ 'site': 's%d',
+ 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
+ 'contentid': video_id
+ }
+ ]
+ })
+
+ bundled = hasattr(sys, 'frozen')
+
+ for site in (353, 183):
+ content_data = (data % site).encode()
+ if site == 353:
+ if bundled:
+ continue
+ try:
+ from Cryptodome.Cipher import Blowfish
+ from Cryptodome.Hash import CMAC
+ pycryptodomex_found = True
+ except ImportError:
+ pycryptodomex_found = False
+ continue
+
+ timestamp = (self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading timestamp JSON', data=json.dumps({
+ 'method': 'da.timestamp.get',
+ 'params': []
+ }).encode(), fatal=False) or {}).get('result')
+ if not timestamp:
+ continue
+
+ query = {
+ 'ts': timestamp,
+ 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(),
+ }
+ else:
+ query = {}
+
+ video_json = self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading video JSON', data=content_data, query=query)
+
+ error = video_json.get('error')
+ if error:
+ origin = error.get('origin')
+ message = error.get('message') or error.get('user_message')
+ extractor_msg = 'Unable to download video %s'
+ if origin == 'NotAllowedForLocation':
+ self.raise_geo_restricted(message, self._GEO_COUNTRIES)
+ elif origin == 'NoRedisValidData':
+ extractor_msg = 'Video %s does not exist'
+ elif site == 353:
+ continue
+ elif bundled:
+ raise ExtractorError(
+ 'This feature does not work from bundled exe. Run hypervideo from sources.',
+ expected=True)
+ elif not pycryptodomex_found:
+ raise ExtractorError(
+ 'pycryptodomex not found. Please install it.',
+ expected=True)
+ elif message:
+ extractor_msg += ': ' + message
+ raise ExtractorError(extractor_msg % video_id, expected=True)
+ else:
+ break
+
+ result = video_json['result']
+ title = result['title']
+
+ quality = qualities(self._KNOWN_FORMATS)
+
+ formats = []
+ for f in result.get('files', []):
+ f_url = f.get('url')
+ content_format = f.get('content_format')
+ if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': content_format,
+ 'quality': quality(content_format),
+ 'filesize': int_or_none(f.get('size_in_bytes')),
+ })
+ self._sort_formats(formats)
+
+ compilation = result.get('compilation')
+ episode = title if compilation else None
+
+ title = '%s - %s' % (compilation, title) if compilation is not None else title
+
+ thumbnails = [{
+ 'url': preview['url'],
+ 'id': preview.get('content_format'),
+ } for preview in result.get('preview', []) if preview.get('url')]
+
+ webpage = self._download_webpage(url, video_id)
+
+ season = self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
+ webpage, 'season', default=None)
+ season_number = int_or_none(self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
+ webpage, 'season number', default=None))
+
+ episode_number = int_or_none(self._search_regex(
+ r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
+ webpage, 'episode number', default=None))
+
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': compilation,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'duration': int_or_none(result.get('duration')),
+ 'formats': formats,
+ }
+
+
+class IviCompilationIE(InfoExtractor):
+ IE_DESC = 'ivi.ru compilations'
+ IE_NAME = 'ivi:compilation'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+ _TESTS = [{
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa',
+ 'title': 'Двое из ларца (2006 - 2008)',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa/season1',
+ 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _extract_entries(self, html, compilation_id):
+ return [
+ self.url_result(
+ 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
+ for serie in re.findall(
+ r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ compilation_id = mobj.group('compilationid')
+ season_id = mobj.group('seasonid')
+
+ if season_id is not None: # Season link
+ season_page = self._download_webpage(
+ url, compilation_id, 'Downloading season %s web page' % season_id)
+ playlist_id = '%s/season%s' % (compilation_id, season_id)
+ playlist_title = self._html_search_meta('title', season_page, 'title')
+ entries = self._extract_entries(season_page, compilation_id)
+ else: # Compilation link
+ compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
+ playlist_id = compilation_id
+ playlist_title = self._html_search_meta('title', compilation_page, 'title')
+ seasons = re.findall(
+ r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
+ if not seasons: # No seasons in this compilation
+ entries = self._extract_entries(compilation_page, compilation_id)
+ else:
+ entries = []
+ for season_id in seasons:
+ season_page = self._download_webpage(
+ 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+ compilation_id, 'Downloading season %s web page' % season_id)
+ entries.extend(self._extract_entries(season_page, compilation_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/hypervideo_dl/extractor/ivideon.py b/hypervideo_dl/extractor/ivideon.py
new file mode 100644
index 0000000..3ca824f
--- /dev/null
+++ b/hypervideo_dl/extractor/ivideon.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_urlparse,
+)
+from ..utils import qualities
+
+
+class IvideonIE(InfoExtractor):
+ IE_NAME = 'ivideon'
+ IE_DESC = 'Ivideon TV'
+ _VALID_URL = r'https?://(?:www\.)?ivideon\.com/tv/(?:[^/]+/)*camera/(?P<id>\d+-[\da-f]+)/(?P<camera_id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.ivideon.com/tv/camera/100-916ca13b5c4ad9f564266424a026386d/0/',
+ 'info_dict': {
+ 'id': '100-916ca13b5c4ad9f564266424a026386d',
+ 'ext': 'flv',
+ 'title': 're:^Касса [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Основное предназначение - запись действий кассиров. Плюс общий вид.',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.ivideon.com/tv/camera/100-c4ee4cb9ede885cf62dfbe93d7b53783/589824/?lang=ru',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ivideon.com/tv/map/22.917923/-31.816406/16/camera/100-e7bc16c7d4b5bbd633fd5350b66dfa9a/0',
+ 'only_matching': True,
+ }]
+
+ _QUALITIES = ('low', 'mid', 'hi')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ server_id, camera_id = mobj.group('id'), mobj.group('camera_id')
+ camera_name, description = None, None
+ camera_url = compat_urlparse.urljoin(
+ url, '/tv/camera/%s/%s/' % (server_id, camera_id))
+
+ webpage = self._download_webpage(camera_url, server_id, fatal=False)
+ if webpage:
+ config_string = self._search_regex(
+ r'var\s+config\s*=\s*({.+?});', webpage, 'config', default=None)
+ if config_string:
+ config = self._parse_json(config_string, server_id, fatal=False)
+ camera_info = config.get('ivTvAppOptions', {}).get('currentCameraInfo')
+ if camera_info:
+ camera_name = camera_info.get('camera_name')
+ description = camera_info.get('misc', {}).get('description')
+ if not camera_name:
+ camera_name = self._html_search_meta(
+ 'name', webpage, 'camera name', default=None) or self._search_regex(
+ r'<h1[^>]+class="b-video-title"[^>]*>([^<]+)', webpage, 'camera name', default=None)
+
+ quality = qualities(self._QUALITIES)
+
+ formats = [{
+ 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse_urlencode({
+ 'server': server_id,
+ 'camera': camera_id,
+ 'sessionId': 'demo',
+ 'q': quality(format_id),
+ }),
+ 'format_id': format_id,
+ 'ext': 'flv',
+ 'quality': quality(format_id),
+ } for format_id in self._QUALITIES]
+ self._sort_formats(formats)
+
+ return {
+ 'id': server_id,
+ 'title': self._live_title(camera_name or server_id),
+ 'description': description,
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/iwara.py b/hypervideo_dl/extractor/iwara.py
new file mode 100644
index 0000000..907d5fc
--- /dev/null
+++ b/hypervideo_dl/extractor/iwara.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ int_or_none,
+ mimetype2ext,
+ remove_end,
+ url_or_none,
+)
+
+
+class IwaraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': 'amVwUl1EHpAD9RD',
+ 'ext': 'mp4',
+ 'title': '【MMD R-18】ガールフレンド carry_me_off',
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
+ 'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
+ 'info_dict': {
+ 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
+ 'ext': 'mp4',
+ 'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
+ 'age_limit': 18,
+ },
+ 'add_ie': ['GoogleDrive'],
+ }, {
+ 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': '6liAP9s2Ojc',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
+ 'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
+ 'upload_date': '20160910',
+ 'uploader': 'aMMDsork',
+ 'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname
+ # ecchi is 'sexy' in Japanese
+ age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
+
+ video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
+
+ if not video_data:
+ iframe_url = self._html_search_regex(
+ r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
+ webpage, 'iframe URL', group='url')
+ return {
+ '_type': 'url_transparent',
+ 'url': iframe_url,
+ 'age_limit': age_limit,
+ }
+
+ title = remove_end(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
+
+ formats = []
+ for a_format in video_data:
+ format_uri = url_or_none(a_format.get('uri'))
+ if not format_uri:
+ continue
+ format_id = a_format.get('resolution')
+ height = int_or_none(self._search_regex(
+ r'(\d+)p', format_id, 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(format_uri, 'https:'),
+ 'format_id': format_id,
+ 'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
+ 'height': height,
+ 'width': int_or_none(height / 9.0 * 16.0 if height else None),
+ 'quality': 1 if format_id == 'Source' else 0,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/izlesene.py b/hypervideo_dl/extractor/izlesene.py
new file mode 100644
index 0000000..f8fca6c
--- /dev/null
+++ b/hypervideo_dl/extractor/izlesene.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ get_element_by_id,
+ int_or_none,
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class IzleseneIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:(?:www|m)\.)?izlesene\.com/
+ (?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+ 'md5': '4384f9f0ea65086734b881085ee05ac2',
+ 'info_dict': {
+ 'id': '7599694',
+ 'ext': 'mp4',
+ 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+ 'description': 'md5:253753e2655dde93f59f74b572454f6d',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader_id': 'pelikzzle',
+ 'timestamp': int,
+ 'upload_date': '20140702',
+ 'duration': 95.395,
+ 'age_limit': 0,
+ }
+ },
+ {
+ 'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
+ 'md5': '97f09b6872bffa284cb7fa4f6910cb72',
+ 'info_dict': {
+ 'id': '17997',
+ 'ext': 'mp4',
+ 'title': 'Tarkan Dortmund 2006 Konseri',
+ 'thumbnail': r're:^https://.*\.jpg',
+ 'uploader_id': 'parlayankiz',
+ 'timestamp': int,
+ 'upload_date': '20061112',
+ 'duration': 253.666,
+ 'age_limit': 0,
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id)
+
+ video = self._parse_json(
+ self._search_regex(
+ r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
+ video_id)
+
+ title = video.get('videoTitle') or self._og_search_title(webpage)
+
+ formats = []
+ for stream in video['media']['level']:
+ source_url = stream.get('source')
+ if not source_url or not isinstance(source_url, compat_str):
+ continue
+ ext = determine_ext(url, 'mp4')
+ quality = stream.get('value')
+ height = int_or_none(quality)
+ formats.append({
+ 'format_id': '%sp' % quality if quality else 'sd',
+ 'url': compat_urllib_parse_unquote(source_url),
+ 'ext': ext,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = video.get('posterURL') or self._proto_relative_url(
+ self._og_search_thumbnail(webpage), scheme='http:')
+
+ uploader = self._html_search_regex(
+ r"adduserUsername\s*=\s*'([^']+)';",
+ webpage, 'uploader', fatal=False)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date'))
+
+ duration = float_or_none(video.get('duration') or self._html_search_regex(
+ r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'duration', fatal=False, group='value'), scale=1000)
+
+ view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
+ comment_count = self._html_search_regex(
+ r'comment_count\s*=\s*\'([^\']+)\';',
+ webpage, 'comment_count', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader_id': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': int_or_none(view_count),
+ 'comment_count': int_or_none(comment_count),
+ 'age_limit': self._family_friendly_search(webpage),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/jamendo.py b/hypervideo_dl/extractor/jamendo.py
new file mode 100644
index 0000000..1db7c64
--- /dev/null
+++ b/hypervideo_dl/extractor/jamendo.py
@@ -0,0 +1,195 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import random
+
+from ..compat import compat_str
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+)
+
+
+class JamendoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ licensing\.jamendo\.com/[^/]+|
+ (?:www\.)?jamendo\.com
+ )
+ /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
+ '''
+ _TESTS = [{
+ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
+ 'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
+ 'info_dict': {
+ 'id': '196219',
+ 'display_id': 'stories-from-emona-i',
+ 'ext': 'flac',
+ # 'title': 'Maya Filipič - Stories from Emona I',
+ 'title': 'Stories from Emona I',
+ # 'artist': 'Maya Filipič',
+ 'track': 'Stories from Emona I',
+ 'duration': 210,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1217438117,
+ 'upload_date': '20080730',
+ 'license': 'by-nc-nd',
+ 'view_count': int,
+ 'like_count': int,
+ 'average_rating': int,
+ 'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
+ }
+ }, {
+ 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, resource, resource_id):
+ path = '/api/%ss' % resource
+ rand = compat_str(random.random())
+ return self._download_json(
+ 'https://www.jamendo.com' + path, resource_id, query={
+ 'id[]': resource_id,
+ }, headers={
+ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+ })[0]
+
+ def _real_extract(self, url):
+ track_id, display_id = self._VALID_URL_RE.match(url).groups()
+ # webpage = self._download_webpage(
+ # 'https://www.jamendo.com/track/' + track_id, track_id)
+ # models = self._parse_json(self._html_search_regex(
+ # r"data-bundled-models='([^']+)",
+ # webpage, 'bundled models'), track_id)
+ # track = models['track']['models'][0]
+ track = self._call_api('track', track_id)
+ title = track_name = track['name']
+ # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+ # artist = get_model('artist')
+ # artist_name = artist.get('name')
+ # if artist_name:
+ # title = '%s - %s' % (artist_name, title)
+ # album = get_model('album')
+
+ formats = [{
+ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
+ % (sub_domain, track_id, format_id),
+ 'format_id': format_id,
+ 'ext': ext,
+ 'quality': quality,
+ } for quality, (format_id, sub_domain, ext) in enumerate((
+ ('mp31', 'mp3l', 'mp3'),
+ ('mp32', 'mp3d', 'mp3'),
+ ('ogg1', 'ogg', 'ogg'),
+ ('flac', 'flac', 'flac'),
+ ))]
+ self._sort_formats(formats)
+
+ urls = []
+ thumbnails = []
+ for covers in (track.get('cover') or {}).values():
+ for cover_id, cover_url in covers.items():
+ if not cover_url or cover_url in urls:
+ continue
+ urls.append(cover_url)
+ size = int_or_none(cover_id.lstrip('size'))
+ thumbnails.append({
+ 'id': cover_id,
+ 'url': cover_url,
+ 'width': size,
+ 'height': size,
+ })
+
+ tags = []
+ for tag in (track.get('tags') or []):
+ tag_name = tag.get('name')
+ if not tag_name:
+ continue
+ tags.append(tag_name)
+
+ stats = track.get('stats') or {}
+ license = track.get('licenseCC') or []
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'title': title,
+ 'description': track.get('description'),
+ 'duration': int_or_none(track.get('duration')),
+ # 'artist': artist_name,
+ 'track': track_name,
+ # 'album': album.get('name'),
+ 'formats': formats,
+ 'license': '-'.join(license) if license else None,
+ 'timestamp': int_or_none(track.get('dateCreated')),
+ 'view_count': int_or_none(stats.get('listenedAll')),
+ 'like_count': int_or_none(stats.get('favorited')),
+ 'average_rating': int_or_none(stats.get('averageNote')),
+ 'tags': tags,
+ }
+
+
+class JamendoAlbumIE(JamendoIE):
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
+ 'info_dict': {
+ 'id': '121486',
+ 'title': 'Duck On Cover',
+ 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
+ },
+ 'playlist': [{
+ 'md5': 'e1a2fcb42bda30dfac990212924149a8',
+ 'info_dict': {
+ 'id': '1032333',
+ 'ext': 'flac',
+ 'title': 'Shearer - Warmachine',
+ 'artist': 'Shearer',
+ 'track': 'Warmachine',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
+ }
+ }, {
+ 'md5': '1f358d7b2f98edfe90fd55dac0799d50',
+ 'info_dict': {
+ 'id': '1032330',
+ 'ext': 'flac',
+ 'title': 'Shearer - Without Your Ghost',
+ 'artist': 'Shearer',
+ 'track': 'Without Your Ghost',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
+ }
+ }],
+ 'params': {
+ 'playlistend': 2
+ }
+ }]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ album = self._call_api('album', album_id)
+ album_name = album.get('name')
+
+ entries = []
+ for track in (album.get('tracks') or []):
+ track_id = track.get('id')
+ if not track_id:
+ continue
+ track_id = compat_str(track_id)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': 'https://www.jamendo.com/track/' + track_id,
+ 'ie_key': JamendoIE.ie_key(),
+ 'id': track_id,
+ 'album': album_name,
+ })
+
+ return self.playlist_result(
+ entries, album_id, album_name,
+ clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
diff --git a/hypervideo_dl/extractor/jeuxvideo.py b/hypervideo_dl/extractor/jeuxvideo.py
new file mode 100644
index 0000000..e9f4ed7
--- /dev/null
+++ b/hypervideo_dl/extractor/jeuxvideo.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class JeuxVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
+
+ _TESTS = [{
+ 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
+ 'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
+ 'info_dict': {
+ 'id': '114765',
+ 'ext': 'mp4',
+ 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+ 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
+ },
+ }, {
+ 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group(1)
+ webpage = self._download_webpage(url, title)
+ title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
+ config_url = self._html_search_regex(
+ r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
+ webpage, 'config URL')
+ config_url = 'http://www.jeuxvideo.com' + config_url
+
+ video_id = self._search_regex(
+ r'id=(\d+)',
+ config_url, 'video ID')
+
+ config = self._download_json(
+ config_url, title, 'Downloading JSON config')
+
+ formats = [{
+ 'url': source['file'],
+ 'format_id': source['label'],
+ 'resolution': source['label'],
+ } for source in reversed(config['sources'])]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': config.get('image'),
+ }
diff --git a/hypervideo_dl/extractor/joj.py b/hypervideo_dl/extractor/joj.py
new file mode 100644
index 0000000..62b28e9
--- /dev/null
+++ b/hypervideo_dl/extractor/joj.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ try_get,
+)
+
+
+class JojIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ joj:|
+ https?://media\.joj\.sk/embed/
+ )
+ (?P<id>[^/?#^]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'info_dict': {
+ 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'ext': 'mp4',
+ 'title': 'NOVÉ BÝVANIE',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3118,
+ }
+ }, {
+ 'url': 'https://media.joj.sk/embed/9i1cxv',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:9i1cxv',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://media.joj.sk/embed/%s' % video_id, video_id)
+
+ title = self._search_regex(
+ (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<title>(?P<title>[^<]+)'), webpage, 'title',
+ default=None, group='title') or self._og_search_title(webpage)
+
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ formats = []
+ for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
+ if isinstance(format_url, compat_str):
+ height = self._search_regex(
+ r'(\d+)[pP]\.', format_url, 'height', default=None)
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%sp' % height if height else None,
+ 'height': int(height),
+ })
+ if not formats:
+ playlist = self._download_xml(
+ 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
+ video_id)
+ for file_el in playlist.findall('./files/file'):
+ path = file_el.get('path')
+ if not path:
+ continue
+ format_id = file_el.get('id') or file_el.get('label')
+ formats.append({
+ 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
+ 'dat/', '', 1),
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id or path, 'height',
+ default=None)),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/jove.py b/hypervideo_dl/extractor/jove.py
new file mode 100644
index 0000000..27e0e37
--- /dev/null
+++ b/hypervideo_dl/extractor/jove.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unified_strdate
+)
+
+
+class JoveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
+ _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
+ _TESTS = [
+ {
+ 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+ 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+ 'info_dict': {
+ 'id': '2744',
+ 'ext': 'mp4',
+ 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+ 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'upload_date': '20110523',
+ }
+ },
+ {
+ 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
+ 'md5': '914aeb356f416811d911996434811beb',
+ 'info_dict': {
+ 'id': '51796',
+ 'ext': 'mp4',
+ 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
+ 'description': 'md5:35ff029261900583970c4023b70f1dc9',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'upload_date': '20140802',
+ }
+ },
+
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ chapters_id = self._html_search_regex(
+ r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+
+ chapters_xml = self._download_xml(
+ self._CHAPTERS_URL.format(video_id=chapters_id),
+ video_id, note='Downloading chapters XML',
+ errnote='Failed to download chapters XML')
+
+ video_url = chapters_xml.attrib.get('video')
+ if not video_url:
+ raise ExtractorError('Failed to get the video URL')
+
+ title = self._html_search_meta('citation_title', webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._html_search_regex(
+ r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
+ webpage, 'description', fatal=False)
+ publish_date = unified_strdate(self._html_search_meta(
+ 'citation_publication_date', webpage, 'publish date', fatal=False))
+ comment_count = int(self._html_search_regex(
+ r'<meta name="num_comments" content="(\d+) Comments?"',
+ webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'upload_date': publish_date,
+ 'comment_count': comment_count,
+ }
diff --git a/hypervideo_dl/extractor/jwplatform.py b/hypervideo_dl/extractor/jwplatform.py
new file mode 100644
index 0000000..c34b5f5
--- /dev/null
+++ b/hypervideo_dl/extractor/jwplatform.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unsmuggle_url
+
+
+class JWPlatformIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
+ 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
+ 'info_dict': {
+ 'id': 'nPripu9l',
+ 'ext': 'mov',
+ 'title': 'Big Buck Bunny Trailer',
+ 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
+ 'upload_date': '20081127',
+ 'timestamp': 1227796140,
+ }
+ }, {
+ 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = JWPlatformIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
+ webpage)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+ video_id = self._match_id(url)
+ json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
+ return self._parse_jwplayer_data(json_data, video_id)
diff --git a/hypervideo_dl/extractor/kakao.py b/hypervideo_dl/extractor/kakao.py
new file mode 100644
index 0000000..31ce7a8
--- /dev/null
+++ b/hypervideo_dl/extractor/kakao.py
@@ -0,0 +1,143 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class KakaoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
+ _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
+
+ _TESTS = [{
+ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
+ 'md5': '702b2fbdeb51ad82f5c904e8c0766340',
+ 'info_dict': {
+ 'id': '301965083',
+ 'ext': 'mp4',
+ 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
+ 'uploader_id': '2671005',
+ 'uploader': '그랑그랑이',
+ 'timestamp': 1488160199,
+ 'upload_date': '20170227',
+ }
+ }, {
+ 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': '300103180',
+ 'ext': 'mp4',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'uploader_id': '2653210',
+ 'uploader': '쇼! 음악중심',
+ 'timestamp': 1485684628,
+ 'upload_date': '20170129',
+ }
+ }, {
+ # geo restricted
+ 'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ display_id = video_id.rstrip('@my')
+ api_base = self._API_BASE_TMPL % video_id
+
+ player_header = {
+ 'Referer': update_url_query(
+ 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
+ 'service': 'kakao_tv',
+ 'autoplay': '1',
+ 'profile': 'HIGH',
+ 'wmode': 'transparent',
+ })
+ }
+
+ query = {
+ 'player': 'monet_html5',
+ 'referer': url,
+ 'uuid': '',
+ 'service': 'kakao_tv',
+ 'section': '',
+ 'dteType': 'PC',
+ 'fields': ','.join([
+ '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
+ 'description', 'channelId', 'createTime', 'duration', 'playCount',
+ 'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
+ 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
+ }
+
+ impress = self._download_json(
+ api_base + 'impress', display_id, 'Downloading video info',
+ query=query, headers=player_header)
+
+ clip_link = impress['clipLink']
+ clip = clip_link['clip']
+
+ title = clip.get('title') or clip_link.get('displayTitle')
+
+ query.update({
+ 'fields': '-*,code,message,url',
+ 'tid': impress.get('tid') or '',
+ })
+
+ formats = []
+ for fmt in (clip.get('videoOutputList') or []):
+ try:
+ profile_name = fmt['profile']
+ if profile_name == 'AUDIO':
+ continue
+ query['profile'] = profile_name
+ try:
+ fmt_url_json = self._download_json(
+ api_base + 'raw/videolocation', display_id,
+ 'Downloading video URL for profile %s' % profile_name,
+ query=query, headers=player_header)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ resp = self._parse_json(e.cause.read().decode(), video_id)
+ if resp.get('code') == 'GeoBlocked':
+ self.raise_geo_restricted()
+ continue
+
+ fmt_url = fmt_url_json['url']
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': profile_name,
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'format_note': fmt.get('label'),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'tbr': int_or_none(fmt.get('kbps')),
+ })
+ except KeyError:
+ pass
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': title,
+ 'description': strip_or_none(clip.get('description')),
+ 'uploader': try_get(clip_link, lambda x: x['channel']['name']),
+ 'uploader_id': str_or_none(clip_link.get('channelId')),
+ 'thumbnail': clip.get('thumbnailUrl'),
+ 'timestamp': unified_timestamp(clip_link.get('createTime')),
+ 'duration': int_or_none(clip.get('duration')),
+ 'view_count': int_or_none(clip.get('playCount')),
+ 'like_count': int_or_none(clip.get('likeCount')),
+ 'comment_count': int_or_none(clip.get('commentCount')),
+ 'formats': formats,
+ 'tags': clip.get('tagList'),
+ }
diff --git a/hypervideo_dl/extractor/kaltura.py b/hypervideo_dl/extractor/kaltura.py
new file mode 100644
index 0000000..c731612
--- /dev/null
+++ b/hypervideo_dl/extractor/kaltura.py
@@ -0,0 +1,377 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ unsmuggle_url,
+ smuggle_url,
+)
+
+
+class KalturaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
+ https?://
+ (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
+ (?:
+ (?:
+ # flash player
+ index\.php/(?:kwidget|extwidget/preview)|
+ # html5 player
+ html5/html5lib/[^/]+/mwEmbedFrame\.php
+ )
+ )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
+ )
+ '''
+ _SERVICE_URL = 'http://cdnapi.kaltura.com'
+ _SERVICE_BASE = '/api_v3/index.php'
+ # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
+ _CAPTION_TYPES = {
+ 1: 'srt',
+ 2: 'ttml',
+ 3: 'vtt',
+ }
+ _TESTS = [
+ {
+ 'url': 'kaltura:269692:1_1jc2y3e4',
+ 'md5': '3adcbdb3dcc02d647539e53f284ba171',
+ 'info_dict': {
+ 'id': '1_1jc2y3e4',
+ 'ext': 'mp4',
+ 'title': 'Straight from the Heart',
+ 'upload_date': '20131219',
+ 'uploader_id': 'mlundberg@wolfgangsvault.com',
+ 'description': 'The Allman Brothers Band, 12/16/1981',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ },
+ },
+ {
+ 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
+ 'only_matching': True,
+ },
+ {
+ # video with subtitles
+ 'url': 'kaltura:111032:1_cw786r8q',
+ 'only_matching': True,
+ },
+ {
+ # video with ttml subtitles (no fileExt)
+ 'url': 'kaltura:1926081:0_l5ye1133',
+ 'info_dict': {
+ 'id': '0_l5ye1133',
+ 'ext': 'mp4',
+ 'title': 'What Can You Do With Python?',
+ 'upload_date': '20160221',
+ 'uploader_id': 'stork',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'ttml',
+ }],
+ },
+ },
+ 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ # unavailable source format
+ 'url': 'kaltura:513551:1_66x4rg7o',
+ 'only_matching': True,
+ }
+ ]
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = KalturaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
+ finditer = (
+ list(re.finditer(
+ r"""(?xs)
+ kWidget\.(?:thumb)?[Ee]mbed\(
+ \{.*?
+ (?P<q1>['"])wid(?P=q1)\s*:\s*
+ (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
+ (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
+ (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
+ """, webpage))
+ or list(re.finditer(
+ r'''(?xs)
+ (?P<q1>["'])
+ (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
+ (?P=q1).*?
+ (?:
+ (?:
+ entry_?[Ii]d|
+ (?P<q2>["'])entry_?[Ii]d(?P=q2)
+ )\s*:\s*|
+ \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
+ )
+ (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
+ ''', webpage))
+ or list(re.finditer(
+ r'''(?xs)
+ <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
+ (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ (?:(?!(?P=q1)).)*
+ [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+ (?:(?!(?P=q1)).)*
+ (?P=q1)
+ ''', webpage))
+ )
+ urls = []
+ for mobj in finditer:
+ embed_info = mobj.groupdict()
+ for k, v in embed_info.items():
+ if v:
+ embed_info[k] = v.strip()
+ url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+ escaped_pid = re.escape(embed_info['partner_id'])
+ service_mobj = re.search(
+ r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+ webpage)
+ if service_mobj:
+ url = smuggle_url(url, {'service_url': service_mobj.group('id')})
+ urls.append(url)
+ return urls
+
+ def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
+ params = actions[0]
+ if len(actions) > 1:
+ for i, a in enumerate(actions[1:], start=1):
+ for k, v in a.items():
+ params['%d:%s' % (i, k)] = v
+
+ data = self._download_json(
+ (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
+ video_id, query=params, *args, **kwargs)
+
+ status = data if len(actions) == 1 else data[0]
+ if status.get('objectType') == 'KalturaAPIException':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, status['message']))
+
+ return data
+
+ def _get_video_info(self, video_id, partner_id, service_url=None):
+ actions = [
+ {
+ 'action': 'null',
+ 'apiVersion': '3.1.5',
+ 'clientTag': 'kdp:v3.8.5',
+ 'format': 1, # JSON, 2 = XML, 3 = PHP
+ 'service': 'multirequest',
+ },
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startWidgetSession',
+ 'widgetId': '_%s' % partner_id,
+ },
+ {
+ 'action': 'get',
+ 'entryId': video_id,
+ 'service': 'baseentry',
+ 'ks': '{1:result:ks}',
+ 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ 'responseProfile:type': 1,
+ },
+ {
+ 'action': 'getbyentryid',
+ 'entryId': video_id,
+ 'service': 'flavorAsset',
+ 'ks': '{1:result:ks}',
+ },
+ {
+ 'action': 'list',
+ 'filter:entryIdEqual': video_id,
+ 'service': 'caption_captionasset',
+ 'ks': '{1:result:ks}',
+ },
+ ]
+ return self._kaltura_api_call(
+ video_id, actions, service_url, note='Downloading video info JSON')
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ partner_id, entry_id = mobj.group('partner_id', 'id')
+ ks = None
+ captions = None
+ if partner_id and entry_id:
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
+ else:
+ path, query = mobj.group('path', 'query')
+ if not path and not query:
+ raise ExtractorError('Invalid URL', expected=True)
+ params = {}
+ if query:
+ params = compat_parse_qs(query)
+ if path:
+ splitted_path = path.split('/')
+ params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
+ if 'wid' in params:
+ partner_id = params['wid'][0][1:]
+ elif 'p' in params:
+ partner_id = params['p'][0]
+ elif 'partner_id' in params:
+ partner_id = params['partner_id'][0]
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+ if 'entry_id' in params:
+ entry_id = params['entry_id'][0]
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
+ elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
+ reference_id = params['flashvars[referenceId]'][0]
+ webpage = self._download_webpage(url, reference_id)
+ entry_data = self._parse_json(self._search_regex(
+ r'window\.kalturaIframePackageData\s*=\s*({.*});',
+ webpage, 'kalturaIframePackageData'),
+ reference_id)['entryResult']
+ info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
+ entry_id = info['id']
+ # Unfortunately, data returned in kalturaIframePackageData lacks
+ # captions so we will try requesting the complete data using
+ # regular approach since we now know the entry_id
+ try:
+ _, info, flavor_assets, captions = self._get_video_info(
+ entry_id, partner_id)
+ except ExtractorError:
+ # Regular scenario failed but we already have everything
+ # extracted apart from captions and can process at least
+ # with this
+ pass
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+ ks = params.get('flashvars[ks]', [None])[0]
+
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ referrer = base64.b64encode(
+ '://'.join(compat_urlparse.urlparse(source_url)[:2])
+ .encode('utf-8')).decode('utf-8')
+ else:
+ referrer = None
+
+ def sign_url(unsigned_url):
+ if ks:
+ unsigned_url += '/ks/%s' % ks
+ if referrer:
+ unsigned_url += '?referrer=%s' % referrer
+ return unsigned_url
+
+ data_url = info['dataUrl']
+ if '/flvclipper/' in data_url:
+ data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
+ formats = []
+ for f in flavor_assets:
+ # Continue if asset is not ready
+ if f.get('status') != 2:
+ continue
+ # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
+ # skip for now.
+ if f.get('fileExt') == 'chun':
+ continue
+ # DRM-protected video, cannot be decrypted
+ if f.get('fileExt') == 'wvm':
+ continue
+ if not f.get('fileExt'):
+ # QT indicates QuickTime; some videos have broken fileExt
+ if f.get('containerFormat') == 'qt':
+ f['fileExt'] = 'mov'
+ else:
+ f['fileExt'] = 'mp4'
+ video_url = sign_url(
+ '%s/flavorId/%s' % (data_url, f['id']))
+ format_id = '%(fileExt)s-%(bitrate)s' % f
+ # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
+ if f.get('isOriginal') is True and not self._is_valid_url(
+ video_url, entry_id, format_id):
+ continue
+ # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
+ # -f mp4-56)
+ vcodec = 'none' if 'videoCodecId' not in f and f.get(
+ 'frameRate') == 0 else f.get('videoCodecId')
+ formats.append({
+ 'format_id': format_id,
+ 'ext': f.get('fileExt'),
+ 'tbr': int_or_none(f['bitrate']),
+ 'fps': int_or_none(f.get('frameRate')),
+ 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
+ 'container': f.get('containerFormat'),
+ 'vcodec': vcodec,
+ 'height': int_or_none(f.get('height')),
+ 'width': int_or_none(f.get('width')),
+ 'url': video_url,
+ })
+ if '/playManifest/' in data_url:
+ m3u8_url = sign_url(data_url.replace(
+ 'format/url', 'format/applehttp'))
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, entry_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if captions:
+ for caption in captions.get('objects', []):
+ # Continue if caption is not ready
+ if caption.get('status') != 2:
+ continue
+ if not caption.get('id'):
+ continue
+ caption_format = int_or_none(caption.get('format'))
+ subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
+ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
+ 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml',
+ })
+
+ return {
+ 'id': entry_id,
+ 'title': info['name'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': clean_html(info.get('description')),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'duration': info.get('duration'),
+ 'timestamp': info.get('createdAt'),
+ 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
+ 'view_count': info.get('plays'),
+ }
diff --git a/hypervideo_dl/extractor/kankan.py b/hypervideo_dl/extractor/kankan.py
new file mode 100644
index 0000000..a677ff4
--- /dev/null
+++ b/hypervideo_dl/extractor/kankan.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+
+_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+class KankanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
+
+ _TEST = {
+ 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
+ 'md5': '29aca1e47ae68fc28804aca89f29507e',
+ 'info_dict': {
+ 'id': '48863',
+ 'ext': 'flv',
+ 'title': 'Ready To Go',
+ },
+ 'skip': 'Only available from China',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
+ surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+ gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls)
+ gcid = gcids[-1]
+
+ info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
+ video_info_page = self._download_webpage(
+ info_url, video_id, 'Downloading video url info')
+ ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
+ path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
+ param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
+ param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
+ key = _md5('xl_mp43651' + param1 + param2)
+ video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ }
diff --git a/hypervideo_dl/extractor/karaoketv.py b/hypervideo_dl/extractor/karaoketv.py
new file mode 100644
index 0000000..bfccf89
--- /dev/null
+++ b/hypervideo_dl/extractor/karaoketv.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class KaraoketvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
+ 'info_dict': {
+ 'id': '58356',
+ 'ext': 'flv',
+ 'title': 'קריוקי של איזון',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ api_page_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
+ webpage, 'API play URL', group='url')
+
+ api_page = self._download_webpage(api_page_url, video_id)
+ video_cdn_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
+ api_page, 'video cdn URL', group='url')
+
+ video_cdn = self._download_webpage(video_cdn_url, video_id)
+ play_path = self._parse_json(
+ self._search_regex(
+ r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
+ video_id)['clip']['url']
+
+ settings = self._parse_json(
+ self._search_regex(
+ r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
+ video_id, fatal=False) or {}
+
+ servers = settings.get('servers')
+ if not servers or not isinstance(servers, list):
+ servers = ('wowzail.video-cdn.com:80/vodcdn', )
+
+ formats = [{
+ 'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
+ 'play_path': play_path,
+ 'app': 'vodcdn',
+ 'page_url': video_cdn_url,
+ 'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
+ 'rtmp_real_time': True,
+ 'ext': 'flv',
+ } for server in servers]
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/karrierevideos.py b/hypervideo_dl/extractor/karrierevideos.py
new file mode 100644
index 0000000..7b291e0
--- /dev/null
+++ b/hypervideo_dl/extractor/karrierevideos.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ fix_xml_ampersands,
+ float_or_none,
+ xpath_with_ns,
+ xpath_text,
+)
+
+
+class KarriereVideosIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
+ 'info_dict': {
+ 'id': '32c91',
+ 'ext': 'flv',
+ 'title': 'AltenpflegerIn',
+ 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
+ 'thumbnail': r're:^http://.*\.png',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # broken ampersands
+ 'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
+ 'info_dict': {
+ 'id': '5sniu',
+ 'ext': 'flv',
+ 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
+ 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
+ 'thumbnail': r're:^http://.*\.png',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = (self._html_search_meta('title', webpage, default=None)
+ or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
+
+ video_id = self._search_regex(
+ r'/config/video/(.+?)\.xml', webpage, 'video id')
+ # Server returns malformed headers
+ # Force Accept-Encoding: * to prevent gzipped results
+ playlist = self._download_xml(
+ 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
+ video_id, transform_source=fix_xml_ampersands,
+ headers={'Accept-Encoding': '*'})
+
+ NS_MAP = {
+ 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
+ }
+
+ def ns(path):
+ return xpath_with_ns(path, NS_MAP)
+
+ item = playlist.find('./tracklist/item')
+ video_file = xpath_text(
+ item, ns('./jwplayer:file'), 'video url', fatal=True)
+ streamer = xpath_text(
+ item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
+
+ uploader = xpath_text(
+ item, ns('./jwplayer:author'), 'uploader')
+ duration = float_or_none(
+ xpath_text(item, ns('./jwplayer:duration'), 'duration'))
+
+ description = self._html_search_regex(
+ r'(?s)<div class="leadtext">(.+?)</div>',
+ webpage, 'description')
+
+ thumbnail = self._html_search_meta(
+ 'thumbnail', webpage, 'thumbnail')
+ if thumbnail:
+ thumbnail = compat_urlparse.urljoin(url, thumbnail)
+
+ return {
+ 'id': video_id,
+ 'url': streamer.replace('rtmpt', 'rtmp'),
+ 'play_path': 'mp4:%s' % video_file,
+ 'ext': 'flv',
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/keezmovies.py b/hypervideo_dl/extractor/keezmovies.py
new file mode 100644
index 0000000..c3eb74c
--- /dev/null
+++ b/hypervideo_dl/extractor/keezmovies.py
@@ -0,0 +1,133 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..aes import aes_decrypt_text
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ str_to_int,
+ strip_or_none,
+ url_or_none,
+)
+
+
+class KeezMoviesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
+ 'md5': '2ac69cdb882055f71d82db4311732a1a',
+ 'info_dict': {
+ 'id': '18070681',
+ 'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
+ 'ext': 'mp4',
+ 'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
+ 'thumbnail': None,
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://www.keezmovies.com/video/18070681',
+ 'only_matching': True,
+ }]
+
+ def _extract_info(self, url, fatal=True):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = (mobj.group('display_id')
+ if 'display_id' in mobj.groupdict()
+ else None) or mobj.group('id')
+
+ webpage = self._download_webpage(
+ url, display_id, headers={'Cookie': 'age_verified=1'})
+
+ formats = []
+ format_urls = set()
+
+ title = None
+ thumbnail = None
+ duration = None
+ encrypted = False
+
+ def extract_format(format_url, height=None):
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//')):
+ return
+ if format_url in format_urls:
+ return
+ format_urls.add(format_url)
+ tbr = int_or_none(self._search_regex(
+ r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
+ if not height:
+ height = int_or_none(self._search_regex(
+ r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
+ if encrypted:
+ format_url = aes_decrypt_text(
+ video_url, title, 32).decode('utf-8')
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ 'tbr': tbr,
+ })
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'flashvars\s*=\s*({.+?});', webpage,
+ 'flashvars', default='{}'),
+ display_id, fatal=False)
+
+ if flashvars:
+ title = flashvars.get('video_title')
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ encrypted = flashvars.get('encrypted') is True
+ for key, value in flashvars.items():
+ mobj = re.search(r'quality_(\d+)[pP]', key)
+ if mobj:
+ extract_format(value, int(mobj.group(1)))
+ video_url = flashvars.get('video_url')
+ if video_url and determine_ext(video_url, None):
+ extract_format(video_url)
+
+ video_url = self._html_search_regex(
+ r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
+ webpage, 'video url', default=None, group='url')
+ if video_url:
+ extract_format(compat_urllib_parse_unquote(video_url))
+
+ if not formats:
+ if 'title="This video is no longer available"' in webpage:
+ raise ExtractorError(
+ 'Video %s is no longer available' % video_id, expected=True)
+
+ try:
+ self._sort_formats(formats)
+ except ExtractorError:
+ if fatal:
+ raise
+
+ if not title:
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title')
+
+ return webpage, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': strip_or_none(title),
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ webpage, info = self._extract_info(url, fatal=False)
+ if not info['formats']:
+ return self.url_result(url, 'Generic')
+ info['view_count'] = str_to_int(self._search_regex(
+ r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
+ return info
diff --git a/hypervideo_dl/extractor/ketnet.py b/hypervideo_dl/extractor/ketnet.py
new file mode 100644
index 0000000..e0599d0
--- /dev/null
+++ b/hypervideo_dl/extractor/ketnet.py
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+from .canvas import CanvasIE
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class KetnetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
+ 'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
+ 'info_dict': {
+ 'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
+ 'ext': 'mp4',
+ 'title': 'Nachtwacht - Reeks 3: Aflevering 1',
+ 'description': 'De Nachtwacht krijgt te maken met een parasiet',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1468.02,
+ 'timestamp': 1609225200,
+ 'upload_date': '20201229',
+ 'series': 'Nachtwacht',
+ 'season': 'Reeks 3',
+ 'episode': 'De Greystook',
+ 'episode_number': 1,
+ },
+ 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+ }, {
+ 'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://senior-bff.ketnet.be/graphql', display_id, query={
+ 'query': '''{
+ video(id: "content/ketnet/nl/%s.model.json") {
+ description
+ episodeNr
+ imageUrl
+ mediaReference
+ programTitle
+ publicationDate
+ seasonTitle
+ subtitleVideodetail
+ titleVideodetail
+ }
+}''' % display_id,
+ })['data']['video']
+
+ mz_id = compat_urllib_parse_unquote(video['mediaReference'])
+
+ return {
+ '_type': 'url_transparent',
+ 'id': mz_id,
+ 'title': video['titleVideodetail'],
+ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
+ 'thumbnail': video.get('imageUrl'),
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('publicationDate')),
+ 'series': video.get('programTitle'),
+ 'season': video.get('seasonTitle'),
+ 'episode': video.get('subtitleVideodetail'),
+ 'episode_number': int_or_none(video.get('episodeNr')),
+ 'ie_key': CanvasIE.ie_key(),
+ }
diff --git a/hypervideo_dl/extractor/khanacademy.py b/hypervideo_dl/extractor/khanacademy.py
new file mode 100644
index 0000000..87e5203
--- /dev/null
+++ b/hypervideo_dl/extractor/khanacademy.py
@@ -0,0 +1,107 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class KhanAcademyBaseIE(InfoExtractor):
+ _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
+
+ def _parse_video(self, video):
+ return {
+ '_type': 'url_transparent',
+ 'url': video['youtubeId'],
+ 'id': video.get('slug'),
+ 'title': video.get('title'),
+ 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
+ 'duration': int_or_none(video.get('duration')),
+ 'description': video.get('description'),
+ 'ie_key': 'Youtube',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ component_props = self._parse_json(self._download_json(
+ 'https://www.khanacademy.org/api/internal/graphql',
+ display_id, query={
+ 'hash': 1604303425,
+ 'variables': json.dumps({
+ 'path': display_id,
+ 'queryParams': '',
+ }),
+ })['data']['contentJson'], display_id)['componentProps']
+ return self._parse_component_props(component_props)
+
+
+class KhanAcademyIE(KhanAcademyBaseIE):
+ IE_NAME = 'khanacademy'
+ _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
+ _TEST = {
+ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
+ 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
+ 'info_dict': {
+ 'id': 'FlIG3TvQCBQ',
+ 'ext': 'mp4',
+ 'title': 'The one-time pad',
+ 'description': 'The perfect cipher',
+ 'duration': 176,
+ 'uploader': 'Brit Cruise',
+ 'uploader_id': 'khanacademy',
+ 'upload_date': '20120411',
+ 'timestamp': 1334170113,
+ 'license': 'cc-by-nc-sa',
+ },
+ 'add_ie': ['Youtube'],
+ }
+
+ def _parse_component_props(self, component_props):
+ video = component_props['tutorialPageData']['contentModel']
+ info = self._parse_video(video)
+ author_names = video.get('authorNames')
+ info.update({
+ 'uploader': ', '.join(author_names) if author_names else None,
+ 'timestamp': parse_iso8601(video.get('dateAdded')),
+ 'license': video.get('kaUserLicense'),
+ })
+ return info
+
+
+class KhanAcademyUnitIE(KhanAcademyBaseIE):
+ IE_NAME = 'khanacademy:unit'
+ _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
+ _TEST = {
+ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
+ 'info_dict': {
+ 'id': 'cryptography',
+ 'title': 'Cryptography',
+ 'description': 'How have humans protected their secret messages through history? What has changed today?',
+ },
+ 'playlist_mincount': 31,
+ }
+
+ def _parse_component_props(self, component_props):
+ curation = component_props['curation']
+
+ entries = []
+ tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
+ for tutorial_number, tutorial in enumerate(tutorials, 1):
+ chapter_info = {
+ 'chapter': tutorial.get('title'),
+ 'chapter_number': tutorial_number,
+ 'chapter_id': tutorial.get('id'),
+ }
+ for content_item in (tutorial.get('contentItems') or []):
+ if content_item.get('kind') == 'Video':
+ info = self._parse_video(content_item)
+ info.update(chapter_info)
+ entries.append(info)
+
+ return self.playlist_result(
+ entries, curation.get('unit'), curation.get('title'),
+ curation.get('description'))
diff --git a/hypervideo_dl/extractor/kickstarter.py b/hypervideo_dl/extractor/kickstarter.py
new file mode 100644
index 0000000..d4da8f4
--- /dev/null
+++ b/hypervideo_dl/extractor/kickstarter.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KickStarterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*'
+ _TESTS = [{
+ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
+ 'md5': 'c81addca81327ffa66c642b5d8b08cab',
+ 'info_dict': {
+ 'id': '1404461844',
+ 'ext': 'mp4',
+ 'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
+ 'description': (
+ 'A unique motocross documentary that examines the '
+ 'life and mind of one of sports most elite athletes: Josh Grant.'
+ ),
+ },
+ }, {
+ 'note': 'Embedded video (not using the native kickstarter video service)',
+ 'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178',
+ 'info_dict': {
+ 'id': '78704821',
+ 'ext': 'mp4',
+ 'uploader_id': 'pebble',
+ 'uploader': 'Pebble Technology',
+ 'title': 'Pebble iOS Notifications',
+ },
+ 'add_ie': ['Vimeo'],
+ }, {
+ 'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
+ 'info_dict': {
+ 'id': '1420158244',
+ 'ext': 'mp4',
+ 'title': 'Power Drive 2000',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>\s*(.*?)(?:\s*&mdash;\s*Kickstarter)?\s*</title>',
+ webpage, 'title')
+ video_url = self._search_regex(
+ r'data-video-url="(.*?)"',
+ webpage, 'video URL', default=None)
+ if video_url is None: # No native kickstarter, look for embedded videos
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Generic',
+ 'url': smuggle_url(url, {'to_generic': True}),
+ 'title': title,
+ }
+
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ if thumbnail is None:
+ thumbnail = self._html_search_regex(
+ r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail image', fatal=False)
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/kinja.py b/hypervideo_dl/extractor/kinja.py
new file mode 100644
index 0000000..79e3026
--- /dev/null
+++ b/hypervideo_dl/extractor/kinja.py
@@ -0,0 +1,221 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class KinjaEmbedIE(InfoExtractor):
+ IENAME = 'kinja:embed'
+ _DOMAIN_REGEX = r'''(?:[^.]+\.)?
+ (?:
+ avclub|
+ clickhole|
+ deadspin|
+ gizmodo|
+ jalopnik|
+ jezebel|
+ kinja|
+ kotaku|
+ lifehacker|
+ splinternews|
+ the(?:inventory|onion|root|takeout)
+ )\.com'''
+ _COMMON_REGEX = r'''/
+ (?:
+ ajax/inset|
+ embed/video
+ )/iframe\?.*?\bid='''
+ _VALID_URL = r'''(?x)https?://%s%s
+ (?P<type>
+ fb|
+ imgur|
+ instagram|
+ jwp(?:layer)?-video|
+ kinjavideo|
+ mcp|
+ megaphone|
+ ooyala|
+ soundcloud(?:-playlist)?|
+ tumblr-post|
+ twitch-stream|
+ twitter|
+ ustream-channel|
+ vimeo|
+ vine|
+ youtube-(?:list|video)
+ )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _TESTS = [{
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
+ 'only_matching': True,
+ }]
+ _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
+ _PROVIDER_MAP = {
+ 'fb': ('facebook.com/video.php?v=', 'Facebook'),
+ 'imgur': ('imgur.com/', 'Imgur'),
+ 'instagram': ('instagram.com/p/', 'Instagram'),
+ 'jwplayer-video': _JWPLATFORM_PROVIDER,
+ 'jwp-video': _JWPLATFORM_PROVIDER,
+ 'megaphone': ('player.megaphone.fm/', 'Generic'),
+ 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
+ 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
+ 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
+ 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
+ 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
+ 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
+ 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
+ 'vimeo': ('vimeo.com/', 'Vimeo'),
+ 'vine': ('vine.co/v/', 'Vine'),
+ 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
+ 'youtube-video': ('youtube.com/embed/', 'Youtube'),
+ }
+
+ @staticmethod
+ def _extract_urls(webpage, url):
+ return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
+ r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ provider = self._PROVIDER_MAP.get(video_type)
+ if provider:
+ video_id = compat_urllib_parse_unquote(video_id)
+ if video_type == 'tumblr-post':
+ video_id, blog = video_id.split('-', 1)
+ result_url = provider[0] % (blog, video_id)
+ elif video_type == 'youtube-list':
+ video_id, playlist_id = video_id.split('/')
+ result_url = provider[0] % (video_id, playlist_id)
+ else:
+ if video_type == 'ooyala':
+ video_id = video_id.split('/')[0]
+ result_url = provider[0] + video_id
+ return self.url_result('http://' + result_url, provider[1])
+
+ if video_type == 'kinjavideo':
+ data = self._download_json(
+ 'https://kinja.com/api/core/video/views/videoById',
+ video_id, query={'videoId': video_id})['data']
+ title = data['title']
+
+ formats = []
+ for k in ('signedPlaylist', 'streaming'):
+ m3u8_url = data.get(k + 'Url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnail = None
+ poster = data.get('poster') or {}
+ poster_id = poster.get('id')
+ if poster_id:
+ thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'formats': formats,
+ 'tags': data.get('tags'),
+ 'timestamp': int_or_none(try_get(
+ data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
+ 'thumbnail': thumbnail,
+ 'uploader': data.get('network'),
+ }
+ else:
+ video_data = self._download_json(
+ 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
+ video_id)['videoMetadata']
+ iptc = video_data['photoVideoMetadataIPTC']
+ title = iptc['title']['en']
+ fmg = video_data.get('photoVideoMetadata_fmg') or {}
+ tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
+ data = self._download_json(
+ tvss_domain + '/api/v3/video-auth/url-signature-tokens',
+ video_id, query={'mcpids': video_id})['data'][0]
+ formats = []
+
+ rendition_url = data.get('renditionUrl')
+ if rendition_url:
+ formats = self._extract_m3u8_formats(
+ rendition_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ fallback_rendition_url = data.get('fallbackRenditionUrl')
+ if fallback_rendition_url:
+ formats.append({
+ 'format_id': 'fallback',
+ 'tbr': int_or_none(self._search_regex(
+ r'_(\d+)\.mp4', fallback_rendition_url,
+ 'bitrate', default=None)),
+ 'url': fallback_rendition_url,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
+ 'uploader': fmg.get('network'),
+ 'duration': int_or_none(iptc.get('fileDuration')),
+ 'formats': formats,
+ 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
+ 'timestamp': parse_iso8601(iptc.get('dateReleased')),
+ }
diff --git a/hypervideo_dl/extractor/kinopoisk.py b/hypervideo_dl/extractor/kinopoisk.py
new file mode 100644
index 0000000..9e8d01f
--- /dev/null
+++ b/hypervideo_dl/extractor/kinopoisk.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ int_or_none,
+)
+
+
+class KinoPoiskIE(InfoExtractor):
+ _GEO_COUNTRIES = ['RU']
+ _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.kinopoisk.ru/film/81041/watch/',
+ 'md5': '4f71c80baea10dfa54a837a46111d326',
+ 'info_dict': {
+ 'id': '81041',
+ 'ext': 'mp4',
+ 'title': 'Алеша попович и тугарин змей',
+ 'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 4533,
+ 'age_limit': 12,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://www.kinopoisk.ru/film/81041',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
+ query={'kpId': video_id})
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
+ webpage, 'data'),
+ video_id)['models']
+
+ film = data['filmStatus']
+ title = film.get('title') or film['originalTitle']
+
+ formats = self._extract_m3u8_formats(
+ data['playlistEntity']['uri'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ description = dict_get(
+ film, ('descriptscription', 'description',
+ 'shortDescriptscription', 'shortDescription'))
+ thumbnail = film.get('coverUrl') or film.get('posterUrl')
+ duration = int_or_none(film.get('duration'))
+ age_limit = int_or_none(film.get('restrictionAge'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/konserthusetplay.py b/hypervideo_dl/extractor/konserthusetplay.py
new file mode 100644
index 0000000..dd42bb2
--- /dev/null
+++ b/hypervideo_dl/extractor/konserthusetplay.py
@@ -0,0 +1,124 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ url_or_none,
+)
+
+
+class KonserthusetPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
+ 'md5': 'e3fd47bf44e864bd23c08e487abe1967',
+ 'info_dict': {
+ 'id': 'CKDDnlCY-dhWAAqiMERd-A',
+ 'ext': 'mp4',
+ 'title': 'Orkesterns instrument: Valthornen',
+ 'description': 'md5:f10e1f0030202020396a4d712d2fa827',
+ 'thumbnail': 're:^https?://.*$',
+ 'duration': 398.76,
+ },
+ }, {
+ 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ e = self._search_regex(
+ r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
+
+ rest = self._download_json(
+ 'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
+ video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+ media = rest['media']
+ player_config = media['playerconfig']
+ playlist = player_config['playlist']
+
+ source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
+
+ FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
+
+ formats = []
+
+ m3u8_url = source.get('url')
+ if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ fallback_url = source.get('fallbackUrl')
+ fallback_format_id = None
+ if fallback_url:
+ fallback_format_id = self._search_regex(
+ FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
+
+ connection_url = (player_config.get('rtmp', {}).get(
+ 'netConnectionUrl') or player_config.get(
+ 'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
+ if connection_url:
+ for f in source['bitrates']:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ format_id = self._search_regex(
+ FORMAT_ID_REGEX, video_url, 'format id', default=None)
+ f_common = {
+ 'vbr': int_or_none(f.get('bitrate')),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ }
+ f = f_common.copy()
+ f.update({
+ 'url': connection_url,
+ 'play_path': video_url,
+ 'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
+ 'ext': 'flv',
+ })
+ formats.append(f)
+ if format_id and format_id == fallback_format_id:
+ f = f_common.copy()
+ f.update({
+ 'url': fallback_url,
+ 'format_id': 'http-%s' % format_id if format_id else 'http',
+ })
+ formats.append(f)
+
+ if not formats and fallback_url:
+ formats.append({
+ 'url': fallback_url,
+ })
+
+ self._sort_formats(formats)
+
+ title = player_config.get('title') or media['title']
+ description = player_config.get('mediaInfo', {}).get('description')
+ thumbnail = media.get('image')
+ duration = float_or_none(media.get('duration'), 1000)
+
+ subtitles = {}
+ captions = source.get('captionsAvailableLanguages')
+ if isinstance(captions, dict):
+ for lang, subtitle_url in captions.items():
+ subtitle_url = url_or_none(subtitle_url)
+ if lang != 'none' and subtitle_url:
+ subtitles.setdefault(lang, []).append({'url': subtitle_url})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/krasview.py b/hypervideo_dl/extractor/krasview.py
new file mode 100644
index 0000000..d27d052
--- /dev/null
+++ b/hypervideo_dl/extractor/krasview.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+)
+
+
+class KrasViewIE(InfoExtractor):
+ IE_DESC = 'Красвью'
+ _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://krasview.ru/video/512228',
+ 'md5': '3b91003cf85fc5db277870c8ebd98eae',
+ 'info_dict': {
+ 'id': '512228',
+ 'ext': 'mp4',
+ 'title': 'Снег, лёд, заносы',
+ 'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
+ 'duration': 27,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {
+ 'skip_download': 'Not accessible from Travis CI server',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ flashvars = json.loads(js_to_json(self._search_regex(
+ r'video_Init\(({.+?})', webpage, 'flashvars')))
+
+ video_url = flashvars['url']
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(flashvars.get('duration'))
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, 'video width', default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, 'video height', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'width': width,
+ 'height': height,
+ }
diff --git a/hypervideo_dl/extractor/ku6.py b/hypervideo_dl/extractor/ku6.py
new file mode 100644
index 0000000..a574408
--- /dev/null
+++ b/hypervideo_dl/extractor/ku6.py
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Ku6IE(InfoExtractor):
+ _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
+ _TEST = {
+ 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
+ 'md5': '01203549b9efbb45f4b87d55bdea1ed1',
+ 'info_dict': {
+ 'id': 'JG-8yS14xzBr4bCn1pu0xw',
+ 'ext': 'f4v',
+ 'title': 'techniques test',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
+ dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id
+ jsonData = self._download_json(dataUrl, video_id)
+ downloadUrl = jsonData['data']['f']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': downloadUrl
+ }
diff --git a/hypervideo_dl/extractor/kusi.py b/hypervideo_dl/extractor/kusi.py
new file mode 100644
index 0000000..9833d35
--- /dev/null
+++ b/hypervideo_dl/extractor/kusi.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ timeconvert,
+ update_url_query,
+ xpath_text,
+)
+
+
+class KUSIIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
+ _TESTS = [{
+ 'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
+ 'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
+ 'info_dict': {
+ 'id': '12689020',
+ 'ext': 'mp4',
+ 'title': "Turko Files: Refused to Help, It Ain't Right!",
+ 'duration': 223.586,
+ 'upload_date': '20160826',
+ 'timestamp': 1472233118,
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ },
+ }, {
+ 'url': 'http://kusi.com/video?clipId=12203019',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ clip_id = mobj.group('clipId')
+ video_id = clip_id or mobj.group('path')
+
+ webpage = self._download_webpage(url, video_id)
+
+ if clip_id is None:
+ video_id = clip_id = self._html_search_regex(
+ r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
+
+ affiliate_id = self._search_regex(
+ r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
+
+ # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
+ xml_url = update_url_query('http://www.kusi.com/build.asp', {
+ 'buildtype': 'buildfeaturexmlrequest',
+ 'featureType': 'Clip',
+ 'featureid': clip_id,
+ 'affiliateno': affiliate_id,
+ 'clientgroupid': '1',
+ 'rnd': int(round(random.random() * 1000000)),
+ })
+
+ doc = self._download_xml(xml_url, video_id)
+
+ video_title = xpath_text(doc, 'HEADLINE', fatal=True)
+ duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
+ description = xpath_text(doc, 'ABSTRACT')
+ thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
+ creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
+
+ quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
+ formats = []
+ for quality in quality_options:
+ formats.append({
+ 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+ 'height': int_or_none(quality.attrib.get('height')),
+ 'width': int_or_none(quality.attrib.get('width')),
+ 'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'timestamp': creation_time,
+ }
diff --git a/hypervideo_dl/extractor/kuwo.py b/hypervideo_dl/extractor/kuwo.py
new file mode 100644
index 0000000..cc5b2a1
--- /dev/null
+++ b/hypervideo_dl/extractor/kuwo.py
@@ -0,0 +1,352 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_id,
+ clean_html,
+ ExtractorError,
+ InAdvancePagedList,
+ remove_start,
+)
+
+
+class KuwoBaseIE(InfoExtractor):
+ _FORMATS = [
+ {'format': 'ape', 'ext': 'ape', 'preference': 100},
+ {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
+ {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
+ {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
+ {'format': 'wma', 'ext': 'wma', 'preference': 20},
+ {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
+ ]
+
+ def _get_formats(self, song_id, tolerate_ip_deny=False):
+ formats = []
+ for file_format in self._FORMATS:
+ query = {
+ 'format': file_format['ext'],
+ 'br': file_format.get('br', ''),
+ 'rid': 'MUSIC_%s' % song_id,
+ 'type': 'convert_url',
+ 'response': 'url'
+ }
+
+ song_url = self._download_webpage(
+ 'http://antiserver.kuwo.cn/anti.s',
+ song_id, note='Download %s url info' % file_format['format'],
+ query=query, headers=self.geo_verification_headers(),
+ )
+
+ if song_url == 'IPDeny' and not tolerate_ip_deny:
+ raise ExtractorError('This song is blocked in this region', expected=True)
+
+ if song_url.startswith('http://') or song_url.startswith('https://'):
+ formats.append({
+ 'url': song_url,
+ 'format_id': file_format['format'],
+ 'format': file_format['format'],
+ 'preference': file_format['preference'],
+ 'abr': file_format.get('abr'),
+ })
+
+ return formats
+
+
+class KuwoIE(KuwoBaseIE):
+ IE_NAME = 'kuwo:song'
+ IE_DESC = '酷我音乐'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.kuwo.cn/yinyue/635632/',
+ 'info_dict': {
+ 'id': '635632',
+ 'ext': 'ape',
+ 'title': '爱我别走',
+ 'creator': '张震岳',
+ 'upload_date': '20080122',
+ 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
+ },
+ 'skip': 'this song has been offline because of copyright issues',
+ }, {
+ 'url': 'http://www.kuwo.cn/yinyue/6446136/',
+ 'info_dict': {
+ 'id': '6446136',
+ 'ext': 'mp3',
+ 'title': '心',
+ 'description': 'md5:5d0e947b242c35dc0eb1d2fce9fbf02c',
+ 'creator': 'IU',
+ 'upload_date': '20150518',
+ },
+ 'params': {
+ 'format': 'mp3-320',
+ },
+ }, {
+ 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ webpage, urlh = self._download_webpage_handle(
+ url, song_id, note='Download song detail info',
+ errnote='Unable to get song detail info')
+ if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
+ raise ExtractorError('this song has been offline because of copyright issues', expected=True)
+
+ song_name = self._html_search_regex(
+ r'<p[^>]+id="lrcName">([^<]+)</p>', webpage, 'song name')
+ singer_name = remove_start(self._html_search_regex(
+ r'<a[^>]+href="http://www\.kuwo\.cn/artist/content\?name=([^"]+)">',
+ webpage, 'singer name', fatal=False), '歌手')
+ lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
+ if lrc_content == '暂无': # indicates no lyrics
+ lrc_content = None
+
+ formats = self._get_formats(song_id)
+ self._sort_formats(formats)
+
+ album_id = self._html_search_regex(
+ r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
+ webpage, 'album id', fatal=False)
+
+ publish_time = None
+ if album_id is not None:
+ album_info_page = self._download_webpage(
+ 'http://www.kuwo.cn/album/%s/' % album_id, song_id,
+ note='Download album detail info',
+ errnote='Unable to get album detail info')
+
+ publish_time = self._html_search_regex(
+ r'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page,
+ 'publish time', fatal=False)
+ if publish_time:
+ publish_time = publish_time.replace('-', '')
+
+ return {
+ 'id': song_id,
+ 'title': song_name,
+ 'creator': singer_name,
+ 'upload_date': publish_time,
+ 'description': lrc_content,
+ 'formats': formats,
+ }
+
+
+class KuwoAlbumIE(InfoExtractor):
+ IE_NAME = 'kuwo:album'
+ IE_DESC = '酷我音乐 - 专辑'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/'
+ _TEST = {
+ 'url': 'http://www.kuwo.cn/album/502294/',
+ 'info_dict': {
+ 'id': '502294',
+ 'title': 'Made\xa0Series\xa0《M》',
+ 'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f',
+ },
+ 'playlist_count': 2,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, album_id, note='Download album info',
+ errnote='Unable to get album info')
+
+ album_name = self._html_search_regex(
+ r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
+ 'album name')
+ album_intro = remove_start(
+ clean_html(get_element_by_id('intro', webpage)),
+ '%s简介:' % album_name)
+
+ entries = [
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
+ webpage)
+ ]
+ return self.playlist_result(entries, album_id, album_name, album_intro)
+
+
+class KuwoChartIE(InfoExtractor):
+ IE_NAME = 'kuwo:chart'
+ IE_DESC = '酷我音乐 - 排行榜'
+ _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
+ _TEST = {
+ 'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
+ 'info_dict': {
+ 'id': '香港中文龙虎榜',
+ },
+ 'playlist_mincount': 7,
+ }
+
+ def _real_extract(self, url):
+ chart_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, chart_id, note='Download chart info',
+ errnote='Unable to get chart info')
+
+ entries = [
+ self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+ r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)
+ ]
+ return self.playlist_result(entries, chart_id)
+
+
+class KuwoSingerIE(InfoExtractor):
+ IE_NAME = 'kuwo:singer'
+ IE_DESC = '酷我音乐 - 歌手'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
+ 'info_dict': {
+ 'id': 'bruno+mars',
+ 'title': 'Bruno\xa0Mars',
+ },
+ 'playlist_mincount': 329,
+ }, {
+ 'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
+ 'info_dict': {
+ 'id': 'Ali',
+ 'title': 'Ali',
+ },
+ 'playlist_mincount': 95,
+ 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/ytdl-org/youtube-dl/jobs/78878540
+ }]
+
+ PAGE_SIZE = 15
+
+ def _real_extract(self, url):
+ singer_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, singer_id, note='Download singer info',
+ errnote='Unable to get singer info')
+
+ singer_name = self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'singer name')
+
+ artist_id = self._html_search_regex(
+ r'data-artistid="(\d+)"', webpage, 'artist id')
+
+ page_count = int(self._html_search_regex(
+ r'data-page="(\d+)"', webpage, 'page count'))
+
+ def page_func(page_num):
+ webpage = self._download_webpage(
+ 'http://www.kuwo.cn/artist/contentMusicsAjax',
+ singer_id, note='Download song list page #%d' % (page_num + 1),
+ errnote='Unable to get song list page #%d' % (page_num + 1),
+ query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
+
+ return [
+ self.url_result(compat_urlparse.urljoin(url, song_url), 'Kuwo')
+ for song_url in re.findall(
+ r'<div[^>]+class="name"><a[^>]+href="(/yinyue/\d+)',
+ webpage)
+ ]
+
+ entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)
+
+ return self.playlist_result(entries, singer_id, singer_name)
+
+
+class KuwoCategoryIE(InfoExtractor):
+ IE_NAME = 'kuwo:category'
+ IE_DESC = '酷我音乐 - 分类'
+ _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
+ _TEST = {
+ 'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
+ 'info_dict': {
+ 'id': '86375',
+ 'title': '八十年代精选',
+ 'description': '这些都是属于八十年代的回忆!',
+ },
+ 'playlist_mincount': 24,
+ }
+
+ def _real_extract(self, url):
+ category_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, category_id, note='Download category info',
+ errnote='Unable to get category info')
+
+ category_name = self._html_search_regex(
+ r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
+
+ category_desc = remove_start(
+ get_element_by_id('intro', webpage).strip(),
+ '%s简介:' % category_name)
+ if category_desc == '暂无':
+ category_desc = None
+
+ jsonm = self._parse_json(self._html_search_regex(
+ r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
+
+ entries = [
+ self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
+ for song in jsonm['musiclist']
+ ]
+ return self.playlist_result(entries, category_id, category_name, category_desc)
+
+
+class KuwoMvIE(KuwoBaseIE):
+ IE_NAME = 'kuwo:mv'
+ IE_DESC = '酷我音乐 - MV'
+ _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/'
+ _TEST = {
+ 'url': 'http://www.kuwo.cn/mv/6480076/',
+ 'info_dict': {
+ 'id': '6480076',
+ 'ext': 'mp4',
+ 'title': 'My HouseMV',
+ 'creator': '2PM',
+ },
+ # In this video, music URLs (anti.s) are blocked outside China and
+ # USA, while the MV URL (mvurl) is available globally, so force the MV
+ # URL for consistent results in different countries
+ 'params': {
+ 'format': 'mv',
+ },
+ }
+ _FORMATS = KuwoBaseIE._FORMATS + [
+ {'format': 'mkv', 'ext': 'mkv', 'preference': 250},
+ {'format': 'mp4', 'ext': 'mp4', 'preference': 200},
+ ]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, song_id, note='Download mv detail info: %s' % song_id,
+ errnote='Unable to get mv detail info: %s' % song_id)
+
+ mobj = re.search(
+ r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
+ webpage)
+ if mobj:
+ song_name = mobj.group('song')
+ singer_name = mobj.group('singer')
+ else:
+ raise ExtractorError('Unable to find song or singer names')
+
+ formats = self._get_formats(song_id, tolerate_ip_deny=True)
+
+ mv_url = self._download_webpage(
+ 'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
+ song_id, note='Download %s MV URL' % song_id)
+ formats.append({
+ 'url': mv_url,
+ 'format_id': 'mv',
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': song_id,
+ 'title': song_name,
+ 'creator': singer_name,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/la7.py b/hypervideo_dl/extractor/la7.py
new file mode 100644
index 0000000..c3b4ffa
--- /dev/null
+++ b/hypervideo_dl/extractor/la7.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+)
+
+
+class LA7IE(InfoExtractor):
+ IE_NAME = 'la7.it'
+ _VALID_URL = r'''(?x)(https?://)?(?:
+ (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
+ tg\.la7\.it/repliche-tgla7\?id=
+ )(?P<id>.+)'''
+
+ _TESTS = [{
+ # 'src' is a plain URL
+ 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
+ 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
+ 'info_dict': {
+ 'id': '0_42j6wd36',
+ 'ext': 'mp4',
+ 'title': 'Inc.Cool8',
+ 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
+ 'thumbnail': 're:^https?://.*',
+ 'uploader_id': 'kdla7pillole@iltrovatore.it',
+ 'timestamp': 1443814869,
+ 'upload_date': '20151002',
+ },
+ }, {
+ # 'src' is a dictionary
+ 'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
+ 'md5': '6b0d8888d286e39870208dfeceaf456b',
+ 'info_dict': {
+ 'id': '189080',
+ 'ext': 'mp4',
+ 'title': 'TG LA7',
+ },
+ }, {
+ 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_data = self._parse_json(
+ self._search_regex(
+ [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
+ webpage, 'player data'),
+ video_id, transform_source=js_to_json)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
+ 'service_url': 'http://nkdam.iltrovatore.it',
+ }),
+ 'id': video_id,
+ 'title': player_data['title'],
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': player_data.get('poster'),
+ 'ie_key': 'Kaltura',
+ }
diff --git a/hypervideo_dl/extractor/laola1tv.py b/hypervideo_dl/extractor/laola1tv.py
new file mode 100644
index 0000000..fa21736
--- /dev/null
+++ b/hypervideo_dl/extractor/laola1tv.py
@@ -0,0 +1,265 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+ urlencode_postdata,
+ xpath_element,
+ xpath_text,
+ update_url_query,
+ js_to_json,
+)
+
+
+class Laola1TvEmbedIE(InfoExtractor):
+ IE_NAME = 'laola1tv:embed'
+ _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
+ _TESTS = [{
+ # flashvars.premium = "false";
+ 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
+ 'info_dict': {
+ 'id': '708065',
+ 'ext': 'mp4',
+ 'title': 'MA Long CHN - FAN Zhendong CHN',
+ 'uploader': 'ITTF - International Table Tennis Federation',
+ 'upload_date': '20161211',
+ },
+ }]
+
+ def _extract_token_url(self, stream_access_url, video_id, data):
+ return self._download_json(
+ self._proto_relative_url(stream_access_url, 'https:'), video_id,
+ headers={
+ 'Content-Type': 'application/json',
+ }, data=json.dumps(data).encode())['data']['stream-access'][0]
+
+ def _extract_formats(self, token_url, video_id):
+ token_doc = self._download_xml(
+ token_url, video_id, 'Downloading token',
+ headers=self.geo_verification_headers())
+
+ token_attrib = xpath_element(token_doc, './/token').attrib
+
+ if token_attrib['status'] != '0':
+ raise ExtractorError(
+ 'Token error: %s' % token_attrib['comment'], expected=True)
+
+ formats = self._extract_akamai_formats(
+ '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
+ video_id)
+ self._sort_formats(formats)
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ flash_vars = self._search_regex(
+ r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
+
+ def get_flashvar(x, *args, **kwargs):
+ flash_var = self._search_regex(
+ r'%s\s*:\s*"([^"]+)"' % x,
+ flash_vars, x, default=None)
+ if not flash_var:
+ flash_var = self._search_regex([
+ r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
+ r'%s\s*=\s*"([^"]+)"' % x],
+ webpage, x, *args, **kwargs)
+ return flash_var
+
+ hd_doc = self._download_xml(
+ 'http://www.laola1.tv/server/hd_video.php', video_id, query={
+ 'play': get_flashvar('streamid'),
+ 'partner': get_flashvar('partnerid'),
+ 'portal': get_flashvar('portalid'),
+ 'lang': get_flashvar('sprache'),
+ 'v5ident': '',
+ })
+
+ _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
+ title = _v('title', fatal=True)
+
+ token_url = None
+ premium = get_flashvar('premium', default=None)
+ if premium:
+ token_url = update_url_query(
+ _v('url', fatal=True), {
+ 'timestamp': get_flashvar('timestamp'),
+ 'auth': get_flashvar('auth'),
+ })
+ else:
+ data_abo = urlencode_postdata(
+ dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
+ stream_access_url = update_url_query(
+ 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
+ 'videoId': _v('id'),
+ 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
+ 'label': _v('label'),
+ 'area': _v('area'),
+ })
+ token_url = self._extract_token_url(stream_access_url, video_id, data_abo)
+
+ formats = self._extract_formats(token_url, video_id)
+
+ categories_str = _v('meta_sports')
+ categories = categories_str.split(',') if categories_str else []
+ is_live = _v('islive') == 'true'
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'upload_date': unified_strdate(_v('time_date')),
+ 'uploader': _v('meta_organisation'),
+ 'categories': categories,
+ 'is_live': is_live,
+ 'formats': formats,
+ }
+
+
+class Laola1TvBaseIE(Laola1TvEmbedIE):
+ def _extract_video(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if 'Dieser Livestream ist bereits beendet.' in webpage:
+ raise ExtractorError('This live stream has already finished.', expected=True)
+
+ conf = self._parse_json(self._search_regex(
+ r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
+ display_id,
+ transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
+ video_id = conf['videoid']
+
+ config = self._download_json(conf['configUrl'], video_id, query={
+ 'videoid': video_id,
+ 'partnerid': conf['partnerid'],
+ 'language': conf.get('language', ''),
+ 'portal': conf.get('portalid', ''),
+ })
+ error = config.get('error')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ video_data = config['video']
+ title = video_data['title']
+ is_live = video_data.get('isLivestream') and video_data.get('isLive')
+ meta = video_data.get('metaInformation')
+ sports = meta.get('sports')
+ categories = sports.split(',') if sports else []
+
+ token_url = self._extract_token_url(
+ video_data['streamAccess'], video_id,
+ video_data['abo']['required'])
+
+ formats = self._extract_formats(token_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('image'),
+ 'categories': categories,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class Laola1TvIE(Laola1TvBaseIE):
+ IE_NAME = 'laola1tv'
+ _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
+ 'info_dict': {
+ 'id': '227883',
+ 'display_id': 'straubing-tigers-koelner-haie',
+ 'ext': 'flv',
+ 'title': 'Straubing Tigers - Kölner Haie',
+ 'upload_date': '20140912',
+ 'is_live': False,
+ 'categories': ['Eishockey'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
+ 'info_dict': {
+ 'id': '464602',
+ 'display_id': 'straubing-tigers-koelner-haie',
+ 'ext': 'flv',
+ 'title': 'Straubing Tigers - Kölner Haie',
+ 'upload_date': '20160129',
+ 'is_live': False,
+ 'categories': ['Eishockey'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
+ 'info_dict': {
+ 'id': '487850',
+ 'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
+ 'ext': 'flv',
+ 'title': 'Belogorie BELGOROD - TRENTINO Diatec',
+ 'upload_date': '20160322',
+ 'uploader': 'CEV - Europäischer Volleyball Verband',
+ 'is_live': True,
+ 'categories': ['Volleyball'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This live stream has already finished.',
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_video(url)
+
+
+class EHFTVIE(Laola1TvBaseIE):
+ IE_NAME = 'ehftv'
+ _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
+ 'info_dict': {
+ 'id': '1166761',
+ 'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
+ 'ext': 'mp4',
+ 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
+ 'is_live': False,
+ 'categories': ['Handball'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_video(url)
+
+
+class ITTFIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self.url_result(
+ update_url_query('https://www.laola1.tv/titanplayer.php', {
+ 'videoid': self._match_id(url),
+ 'type': 'V',
+ 'lang': 'en',
+ 'portal': 'int',
+ 'customer': 1024,
+ }), Laola1TvEmbedIE.ie_key())
diff --git a/hypervideo_dl/extractor/lbry.py b/hypervideo_dl/extractor/lbry.py
new file mode 100644
index 0000000..cfd6b83
--- /dev/null
+++ b/hypervideo_dl/extractor/lbry.py
@@ -0,0 +1,280 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ OnDemandPagedList,
+ try_get,
+ urljoin,
+)
+
+
+class LBRYBaseIE(InfoExtractor):
+ _BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
+ _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
+ _OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
+ _SUPPORTED_STREAM_TYPES = ['video', 'audio']
+
+ def _call_api_proxy(self, method, display_id, params, resource):
+ return self._download_json(
+ 'https://api.lbry.tv/api/v1/proxy',
+ display_id, 'Downloading %s JSON metadata' % resource,
+ headers={'Content-Type': 'application/json-rpc'},
+ data=json.dumps({
+ 'method': method,
+ 'params': params,
+ }).encode())['result']
+
+ def _resolve_url(self, url, display_id, resource):
+ return self._call_api_proxy(
+ 'resolve', display_id, {'urls': url}, resource)[url]
+
+ def _permanent_url(self, url, claim_name, claim_id):
+ return urljoin(url, '/%s:%s' % (claim_name, claim_id))
+
+ def _parse_stream(self, stream, url):
+ stream_value = stream.get('value') or {}
+ stream_type = stream_value.get('stream_type')
+ source = stream_value.get('source') or {}
+ media = stream_value.get(stream_type) or {}
+ signing_channel = stream.get('signing_channel') or {}
+ channel_name = signing_channel.get('name')
+ channel_claim_id = signing_channel.get('claim_id')
+ channel_url = None
+ if channel_name and channel_claim_id:
+ channel_url = self._permanent_url(url, channel_name, channel_claim_id)
+
+ info = {
+ 'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
+ 'description': stream_value.get('description'),
+ 'license': stream_value.get('license'),
+ 'timestamp': int_or_none(stream.get('timestamp')),
+ 'release_timestamp': int_or_none(stream_value.get('release_time')),
+ 'tags': stream_value.get('tags'),
+ 'duration': int_or_none(media.get('duration')),
+ 'channel': try_get(signing_channel, lambda x: x['value']['title']),
+ 'channel_id': channel_claim_id,
+ 'channel_url': channel_url,
+ 'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
+ 'filesize': int_or_none(source.get('size')),
+ }
+ if stream_type == 'audio':
+ info['vcodec'] = 'none'
+ else:
+ info.update({
+ 'width': int_or_none(media.get('width')),
+ 'height': int_or_none(media.get('height')),
+ })
+ return info
+
+
+class LBRYIE(LBRYBaseIE):
+ IE_NAME = 'lbry'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
+ _TESTS = [{
+ # Video
+ 'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
+ 'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
+ 'info_dict': {
+ 'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
+ 'ext': 'mp4',
+ 'title': 'First day in LBRY? Start HERE!',
+ 'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
+ 'timestamp': 1595694354,
+ 'upload_date': '20200725',
+ 'release_timestamp': 1595340697,
+ 'release_date': '20200721',
+ 'width': 1280,
+ 'height': 720,
+ }
+ }, {
+ # Audio
+ 'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
+ 'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
+ 'info_dict': {
+ 'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'ext': 'mp3',
+ 'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
+ 'description': 'md5:661ac4f1db09f31728931d7b88807a61',
+ 'timestamp': 1591312601,
+ 'upload_date': '20200604',
+ 'release_timestamp': 1591312421,
+ 'release_date': '20200604',
+ 'tags': list,
+ 'duration': 2570,
+ 'channel': 'The LBRY Foundation',
+ 'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'vcodec': 'none',
+ }
+ }, {
+ # HLS
+ 'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
+ 'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
+ 'info_dict': {
+ 'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
+ 'ext': 'mp4',
+ 'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
+ 'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
+ 'timestamp': 1618254123,
+ 'upload_date': '20210412',
+ 'release_timestamp': 1618254002,
+ 'release_date': '20210412',
+ 'tags': list,
+ 'duration': 554,
+ 'channel': 'Gardening In Canada',
+ 'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
+ 'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
+ 'formats': 'mincount:3',
+ }
+ }, {
+ 'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
+ 'only_matching': True,
+ }, {
+ 'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/Episode-1:e7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ if display_id.startswith('$/'):
+ display_id = display_id.split('/', 2)[-1].replace('/', ':')
+ else:
+ display_id = display_id.replace(':', '#')
+ display_id = compat_urllib_parse_unquote(display_id)
+ uri = 'lbry://' + display_id
+ result = self._resolve_url(uri, display_id, 'stream')
+ result_value = result['value']
+ if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
+ raise ExtractorError('Unsupported URL', expected=True)
+ claim_id = result['claim_id']
+ title = result_value['title']
+ streaming_url = self._call_api_proxy(
+ 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
+ info = self._parse_stream(result, url)
+ urlh = self._request_webpage(
+ streaming_url, display_id, note='Downloading streaming redirect url info')
+ if determine_ext(urlh.geturl()) == 'm3u8':
+ info['formats'] = self._extract_m3u8_formats(
+ urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(info['formats'])
+ else:
+ info['url'] = streaming_url
+ info.update({
+ 'id': claim_id,
+ 'title': title,
+ })
+ return info
+
+
+class LBRYChannelIE(LBRYBaseIE):
+ IE_NAME = 'lbry:channel'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
+ _TESTS = [{
+ 'url': 'https://lbry.tv/@LBRYFoundation:0',
+ 'info_dict': {
+ 'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'title': 'The LBRY Foundation',
+ 'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
+ },
+ 'playlist_count': 29,
+ }, {
+ 'url': 'https://lbry.tv/@LBRYFoundation',
+ 'only_matching': True,
+ }]
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, claim_id, url, params, page):
+ page += 1
+ page_params = {
+ 'channel_ids': [claim_id],
+ 'claim_type': 'stream',
+ 'no_totals': True,
+ 'page': page,
+ 'page_size': self._PAGE_SIZE,
+ }
+ page_params.update(params)
+ result = self._call_api_proxy(
+ 'claim_search', claim_id, page_params, 'page %d' % page)
+ for item in (result.get('items') or []):
+ stream_claim_name = item.get('name')
+ stream_claim_id = item.get('claim_id')
+ if not (stream_claim_name and stream_claim_id):
+ continue
+
+ info = self._parse_stream(item, url)
+ info.update({
+ '_type': 'url',
+ 'id': stream_claim_id,
+ 'title': try_get(item, lambda x: x['value']['title']),
+ 'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
+ })
+ yield info
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url).replace(':', '#')
+ result = self._resolve_url(
+ 'lbry://' + display_id, display_id, 'channel')
+ claim_id = result['claim_id']
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ content = qs.get('content', [None])[0]
+ params = {
+ 'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+ 'order_by': {
+ 'new': ['release_time'],
+ 'top': ['effective_amount'],
+ 'trending': ['trending_group', 'trending_mixed'],
+ }[qs.get('order', ['new'])[0]],
+ 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+ }
+ duration = qs.get('duration', [None])[0]
+ if duration:
+ params['duration'] = {
+ 'long': '>=1200',
+ 'short': '<=240',
+ }[duration]
+ language = qs.get('language', ['all'])[0]
+ if language != 'all':
+ languages = [language]
+ if language == 'en':
+ languages.append('none')
+ params['any_languages'] = languages
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, claim_id, url, params),
+ self._PAGE_SIZE)
+ result_value = result.get('value') or {}
+ return self.playlist_result(
+ entries, claim_id, result_value.get('title'),
+ result_value.get('description'))
diff --git a/hypervideo_dl/extractor/lci.py b/hypervideo_dl/extractor/lci.py
new file mode 100644
index 0000000..920872f
--- /dev/null
+++ b/hypervideo_dl/extractor/lci.py
@@ -0,0 +1,26 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LCIIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+ _TEST = {
+ 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html',
+ 'md5': '2fdb2538b884d4d695f9bd2bde137e6c',
+ 'info_dict': {
+ 'id': '13244802',
+ 'ext': 'mp4',
+ 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting',
+ 'description': 'md5:a4363e3a960860132f8124b62f4a01c9',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ wat_id = self._search_regex(
+ (r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
+ webpage, 'wat id')
+ return self.url_result('wat:' + wat_id, 'Wat', wat_id)
diff --git a/hypervideo_dl/extractor/lcp.py b/hypervideo_dl/extractor/lcp.py
new file mode 100644
index 0000000..ade27a9
--- /dev/null
+++ b/hypervideo_dl/extractor/lcp.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .arkena import ArkenaIE
+
+
+class LcpPlayIE(ArkenaIE):
+ _VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
+ _TESTS = [{
+ 'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
+ 'md5': 'b8bd9298542929c06c1c15788b1f277a',
+ 'info_dict': {
+ 'id': '327336',
+ 'ext': 'mp4',
+ 'title': '327336',
+ 'timestamp': 1456391602,
+ 'upload_date': '20160225',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+
+class LcpIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
+
+ _TESTS = [{
+ # arkena embed
+ 'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
+ 'md5': 'b8bd9298542929c06c1c15788b1f277a',
+ 'info_dict': {
+ 'id': 'd56d03e9',
+ 'ext': 'mp4',
+ 'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
+ 'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
+ 'timestamp': 1456488895,
+ 'upload_date': '20160226',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # dailymotion live stream
+ 'url': 'http://www.lcp.fr/le-direct',
+ 'info_dict': {
+ 'id': 'xji3qy',
+ 'ext': 'mp4',
+ 'title': 'La Chaine Parlementaire (LCP), Live TNT',
+ 'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
+ 'uploader': 'LCP',
+ 'uploader_id': 'xbz33d',
+ 'timestamp': 1308923058,
+ 'upload_date': '20110624',
+ },
+ 'params': {
+ # m3u8 live stream
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ play_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
+ webpage, 'play iframe', default=None, group='url')
+
+ if not play_url:
+ return self.url_result(url, 'Generic')
+
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, fatal=True)
+ description = self._html_search_meta(
+ ('description', 'twitter:description'), webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': LcpPlayIE.ie_key(),
+ 'url': play_url,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/lecture2go.py b/hypervideo_dl/extractor/lecture2go.py
new file mode 100644
index 0000000..81b5d41
--- /dev/null
+++ b/hypervideo_dl/extractor/lecture2go.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ determine_protocol,
+ parse_duration,
+ int_or_none,
+)
+
+
+class Lecture2GoIE(InfoExtractor):
+ _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
+ 'md5': 'ac02b570883020d208d405d5a3fd2f7f',
+ 'info_dict': {
+ 'id': '17473',
+ 'ext': 'mp4',
+ 'title': '2 - Endliche Automaten und reguläre Sprachen',
+ 'creator': 'Frank Heitmann',
+ 'duration': 5220,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
+
+ formats = []
+ for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
+ ext = determine_ext(url)
+ protocol = determine_protocol({'url': url})
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
+ else:
+ if protocol == 'rtmp':
+ continue # XXX: currently broken
+ formats.append({
+ 'format_id': protocol,
+ 'url': url,
+ })
+
+ self._sort_formats(formats)
+
+ creator = self._html_search_regex(
+ r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'creator': creator,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
diff --git a/hypervideo_dl/extractor/lecturio.py b/hypervideo_dl/extractor/lecturio.py
new file mode 100644
index 0000000..1b2dcef
--- /dev/null
+++ b/hypervideo_dl/extractor/lecturio.py
@@ -0,0 +1,243 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LecturioBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
+ _LOGIN_URL = 'https://app.lecturio.com/en/login'
+ _NETRC_MACHINE = 'lecturio'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ # Sets some cookies
+ _, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(url_handle):
+ return self._LOGIN_URL not in url_handle.geturl()
+
+ # Already logged in
+ if is_logged(urlh):
+ return
+
+ login_form = {
+ 'signin[email]': username,
+ 'signin[password]': password,
+ 'signin[remember]': 'on',
+ }
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ # Logged in successfully
+ if is_logged(urlh):
+ return
+
+ errors = self._html_search_regex(
+ r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
+ 'errors', default=None)
+ if errors:
+ raise ExtractorError('Unable to login: %s' % errors, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class LecturioIE(LecturioBaseIE):
+ _VALID_URL = r'''(?x)
+ https://
+ (?:
+ app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+ (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
+ 'md5': '9a42cf1d8282a6311bf7211bbde26fde',
+ 'info_dict': {
+ 'id': '39634',
+ 'ext': 'mp4',
+ 'title': 'Important Concepts and Terms — Introduction to Microbiology',
+ },
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
+ 'only_matching': True,
+ }]
+
+ _CC_LANGS = {
+ 'Arabic': 'ar',
+ 'Bulgarian': 'bg',
+ 'German': 'de',
+ 'English': 'en',
+ 'Spanish': 'es',
+ 'Persian': 'fa',
+ 'French': 'fr',
+ 'Japanese': 'ja',
+ 'Polish': 'pl',
+ 'Pashto': 'ps',
+ 'Russian': 'ru',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ nt = mobj.group('nt') or mobj.group('nt_de')
+ lecture_id = mobj.group('id')
+ display_id = nt or lecture_id
+ api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
+ video = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ title = video['title'].strip()
+ if not lecture_id:
+ pid = video.get('productId') or video.get('uid')
+ if pid:
+ spid = pid.split('_')
+ if spid and len(spid) == 2:
+ lecture_id = spid[1]
+
+ formats = []
+ for format_ in video['content']['media']:
+ if not isinstance(format_, dict):
+ continue
+ file_ = format_.get('file')
+ if not file_:
+ continue
+ ext = determine_ext(file_)
+ if ext == 'smil':
+ # smil contains only broken RTMP formats anyway
+ continue
+ file_url = url_or_none(file_)
+ if not file_url:
+ continue
+ label = str_or_none(format_.get('label'))
+ filesize = int_or_none(format_.get('fileSize'))
+ f = {
+ 'url': file_url,
+ 'format_id': label,
+ 'filesize': float_or_none(filesize, invscale=1000)
+ }
+ if label:
+ mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
+ if mobj:
+ f.update({
+ 'format_id': mobj.group(2),
+ 'height': int(mobj.group(1)),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ automatic_captions = {}
+ captions = video.get('captions') or []
+ for cc in captions:
+ cc_url = cc.get('url')
+ if not cc_url:
+ continue
+ cc_label = cc.get('translatedCode')
+ lang = cc.get('languageCode') or self._search_regex(
+ r'/([a-z]{2})_', cc_url, 'lang',
+ default=cc_label.split()[0] if cc_label else 'en')
+ original_lang = self._search_regex(
+ r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
+ default=None)
+ sub_dict = (automatic_captions
+ if 'auto-translated' in cc_label or original_lang
+ else subtitles)
+ sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': lecture_id or nt,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class LecturioCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
+ 'info_dict': {
+ 'id': 'microbiology-introduction',
+ 'title': 'Microbiology: Introduction',
+ 'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
+ },
+ 'playlist_count': 45,
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://app.lecturio.com/#/course/c/6434',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ nt, course_id = re.match(self._VALID_URL, url).groups()
+ display_id = nt or course_id
+ api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
+ course = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ entries = []
+ for lecture in course.get('lectures', []):
+ lecture_id = str_or_none(lecture.get('id'))
+ lecture_url = lecture.get('url')
+ if lecture_url:
+ lecture_url = urljoin(url, lecture_url)
+ else:
+ lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+ return self.playlist_result(
+ entries, display_id, course.get('title'),
+ clean_html(course.get('description')))
+
+
+class LecturioDeCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
+ _TEST = {
+ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
+ webpage):
+ lecture_url = urljoin(url, mobj.group('url'))
+ lecture_id = mobj.group('id')
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+
+ title = self._search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
+
+ return self.playlist_result(entries, display_id, title)
diff --git a/hypervideo_dl/extractor/leeco.py b/hypervideo_dl/extractor/leeco.py
new file mode 100644
index 0000000..7dc0ad7
--- /dev/null
+++ b/hypervideo_dl/extractor/leeco.py
@@ -0,0 +1,368 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import hashlib
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ determine_ext,
+ encode_data_uri,
+ ExtractorError,
+ int_or_none,
+ orderedSet,
+ parse_iso8601,
+ str_or_none,
+ url_basename,
+ urshift,
+)
+
+
+class LeIE(InfoExtractor):
+ IE_DESC = '乐视网'
+ _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
+ _GEO_COUNTRIES = ['CN']
+ _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
+
+ _TESTS = [{
+ 'url': 'http://www.le.com/ptv/vplay/22005890.html',
+ 'md5': 'edadcfe5406976f42f9f266057ee5e40',
+ 'info_dict': {
+ 'id': '22005890',
+ 'ext': 'mp4',
+ 'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
+ 'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
+ },
+ 'params': {
+ 'hls_prefer_native': True,
+ },
+ }, {
+ 'url': 'http://www.le.com/ptv/vplay/1415246.html',
+ 'info_dict': {
+ 'id': '1415246',
+ 'ext': 'mp4',
+ 'title': '美人天下01',
+ 'description': 'md5:28942e650e82ed4fcc8e4de919ee854d',
+ },
+ 'params': {
+ 'hls_prefer_native': True,
+ },
+ }, {
+ 'note': 'This video is available only in Mainland China, thus a proxy is needed',
+ 'url': 'http://www.le.com/ptv/vplay/1118082.html',
+ 'md5': '2424c74948a62e5f31988438979c5ad1',
+ 'info_dict': {
+ 'id': '1118082',
+ 'ext': 'mp4',
+ 'title': '与龙共舞 完整版',
+ 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
+ },
+ 'params': {
+ 'hls_prefer_native': True,
+ },
+ }, {
+ 'url': 'http://sports.le.com/video/25737697.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.lesports.com/match/1023203003.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sports.le.com/match/1023203003.html',
+ 'only_matching': True,
+ }]
+
+ # ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf
+ def ror(self, param1, param2):
+ _loc3_ = 0
+ while _loc3_ < param2:
+ param1 = urshift(param1, 1) + ((param1 & 1) << 31)
+ _loc3_ += 1
+ return param1
+
+ def calc_time_key(self, param1):
+ _loc2_ = 185025305
+ return self.ror(param1, _loc2_ % 17) ^ _loc2_
+
+ # see M3U8Encryption class in KLetvPlayer.swf
+ @staticmethod
+ def decrypt_m3u8(encrypted_data):
+ if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
+ return encrypted_data
+ encrypted_data = encrypted_data[5:]
+
+ _loc4_ = bytearray(2 * len(encrypted_data))
+ for idx, val in enumerate(encrypted_data):
+ b = compat_ord(val)
+ _loc4_[2 * idx] = b // 16
+ _loc4_[2 * idx + 1] = b % 16
+ idx = len(_loc4_) - 11
+ _loc4_ = _loc4_[idx:] + _loc4_[:idx]
+ _loc7_ = bytearray(len(encrypted_data))
+ for i in range(len(encrypted_data)):
+ _loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
+
+ return bytes(_loc7_)
+
+ def _check_errors(self, play_json):
+ # Check for errors
+ playstatus = play_json['msgs']['playstatus']
+ if playstatus['status'] == 0:
+ flag = playstatus['flag']
+ if flag == 1:
+ self.raise_geo_restricted()
+ else:
+ raise ExtractorError('Generic error. flag = %d' % flag, expected=True)
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ page = self._download_webpage(url, media_id)
+
+ play_json_flash = self._download_json(
+ 'http://player-pc.le.com/mms/out/video/playJson',
+ media_id, 'Downloading flash playJson data', query={
+ 'id': media_id,
+ 'platid': 1,
+ 'splatid': 105,
+ 'format': 1,
+ 'source': 1000,
+ 'tkey': self.calc_time_key(int(time.time())),
+ 'domain': 'www.le.com',
+ 'region': 'cn',
+ },
+ headers=self.geo_verification_headers())
+ self._check_errors(play_json_flash)
+
+ def get_flash_urls(media_url, format_id):
+ nodes_data = self._download_json(
+ media_url, media_id,
+ 'Download JSON metadata for format %s' % format_id,
+ query={
+ 'm3v': 1,
+ 'format': 1,
+ 'expect': 3,
+ 'tss': 'ios',
+ })
+
+ req = self._request_webpage(
+ nodes_data['nodelist'][0]['location'], media_id,
+ note='Downloading m3u8 information for format %s' % format_id)
+
+ m3u8_data = self.decrypt_m3u8(req.read())
+
+ return {
+ 'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
+ }
+
+ extracted_formats = []
+ formats = []
+ playurl = play_json_flash['msgs']['playurl']
+ play_domain = playurl['domain'][0]
+
+ for format_id, format_data in playurl.get('dispatch', []).items():
+ if format_id in extracted_formats:
+ continue
+ extracted_formats.append(format_id)
+
+ media_url = play_domain + format_data[0]
+ for protocol, format_url in get_flash_urls(media_url, format_id).items():
+ f = {
+ 'url': format_url,
+ 'ext': determine_ext(format_data[1]),
+ 'format_id': '%s-%s' % (protocol, format_id),
+ 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
+ 'quality': int_or_none(format_id),
+ }
+
+ if format_id[-1:] == 'p':
+ f['height'] = int_or_none(format_id[:-1])
+
+ formats.append(f)
+ self._sort_formats(formats, ('height', 'quality', 'format_id'))
+
+ publish_time = parse_iso8601(self._html_search_regex(
+ r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
+ delimiter=' ', timezone=datetime.timedelta(hours=8))
+ description = self._html_search_meta('description', page, fatal=False)
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': playurl['title'],
+ 'thumbnail': playurl['pic'],
+ 'description': description,
+ 'timestamp': publish_time,
+ }
+
+
+class LePlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.le.com/tv/46177.html',
+ 'info_dict': {
+ 'id': '46177',
+ 'title': '美人天下',
+ 'description': 'md5:395666ff41b44080396e59570dbac01c'
+ },
+ 'playlist_count': 35
+ }, {
+ 'url': 'http://tv.le.com/izt/wuzetian/index.html',
+ 'info_dict': {
+ 'id': 'wuzetian',
+ 'title': '武媚娘传奇',
+ 'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+ },
+ # This playlist contains some extra videos other than the drama itself
+ 'playlist_mincount': 96
+ }, {
+ 'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
+ # This series is moved to http://www.le.com/tv/10005297.html
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.le.com/comic/92063.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ page = self._download_webpage(url, playlist_id)
+
+ # Currently old domain names are still used in playlists
+ media_ids = orderedSet(re.findall(
+ r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
+ entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
+ for media_id in media_ids]
+
+ title = self._html_search_meta('keywords', page,
+ fatal=False).split(',')[0]
+ description = self._html_search_meta('description', page, fatal=False)
+
+ return self.playlist_result(entries, playlist_id, playlist_title=title,
+ playlist_description=description)
+
+
+class LetvCloudIE(InfoExtractor):
+ # Most of *.letv.com is changed to *.le.com on 2016/01/02
+ # but yuntv.letv.com is kept, so also keep the extractor name
+ IE_DESC = '乐视云'
+ _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
+
+ _TESTS = [{
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
+ 'md5': '26450599afd64c513bc77030ad15db44',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_467623dedf',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_467623dedf',
+ },
+ }, {
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
+ 'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_ec93197892',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_ec93197892',
+ },
+ }, {
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
+ 'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_187060b6fd',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_187060b6fd',
+ },
+ }]
+
+ @staticmethod
+ def sign_data(obj):
+ if obj['cf'] == 'flash':
+ salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
+ items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
+ elif obj['cf'] == 'html5':
+ salt = 'fbeh5player12c43eccf2bec3300344'
+ items = ['cf', 'ran', 'uu', 'bver', 'vu']
+ input_data = ''.join([item + obj[item] for item in items]) + salt
+ obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
+
+ def _get_formats(self, cf, uu, vu, media_id):
+ def get_play_json(cf, timestamp):
+ data = {
+ 'cf': cf,
+ 'ver': '2.2',
+ 'bver': 'firefox44.0',
+ 'format': 'json',
+ 'uu': uu,
+ 'vu': vu,
+ 'ran': compat_str(timestamp),
+ }
+ self.sign_data(data)
+ return self._download_json(
+ 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data),
+ media_id, 'Downloading playJson data for type %s' % cf)
+
+ play_json = get_play_json(cf, time.time())
+ # The server time may be different from local time
+ if play_json.get('code') == 10071:
+ play_json = get_play_json(cf, play_json['timestamp'])
+
+ if not play_json.get('data'):
+ if play_json.get('message'):
+ raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
+ elif play_json.get('code'):
+ raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
+ else:
+ raise ExtractorError('Letv cloud returned an unknown error')
+
+ def b64decode(s):
+ return compat_b64decode(s).decode('utf-8')
+
+ formats = []
+ for media in play_json['data']['video_info']['media'].values():
+ play_url = media['play_url']
+ url = b64decode(play_url['main_url'])
+ decoded_url = b64decode(url_basename(url))
+ formats.append({
+ 'url': url,
+ 'ext': determine_ext(decoded_url),
+ 'format_id': str_or_none(play_url.get('vtype')),
+ 'format_note': str_or_none(play_url.get('definition')),
+ 'width': int_or_none(play_url.get('vwidth')),
+ 'height': int_or_none(play_url.get('vheight')),
+ })
+
+ return formats
+
+ def _real_extract(self, url):
+ uu_mobj = re.search(r'uu=([\w]+)', url)
+ vu_mobj = re.search(r'vu=([\w]+)', url)
+
+ if not uu_mobj or not vu_mobj:
+ raise ExtractorError('Invalid URL: %s' % url, expected=True)
+
+ uu = uu_mobj.group(1)
+ vu = vu_mobj.group(1)
+ media_id = uu + '_' + vu
+
+ formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
+ self._sort_formats(formats)
+
+ return {
+ 'id': media_id,
+ 'title': 'Video %s' % media_id,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/lego.py b/hypervideo_dl/extractor/lego.py
new file mode 100644
index 0000000..1e3c19d
--- /dev/null
+++ b/hypervideo_dl/extractor/lego.py
@@ -0,0 +1,149 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+)
+
+
+class LEGOIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
+ _TESTS = [{
+ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
+ 'md5': 'f34468f176cfd76488767fc162c405fa',
+ 'info_dict': {
+ 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
+ 'ext': 'mp4',
+ 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ },
+ }, {
+ # geo-restricted but the contentUrl contain a valid url
+ 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
+ 'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
+ 'info_dict': {
+ 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 20: Helden van het koninkrijk',
+ 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+ 'age_limit': 5,
+ },
+ }, {
+ # with subtitle
+ 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
+ 'info_dict': {
+ 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
+ 'ext': 'mp4',
+ 'title': 'De kleine puppy',
+ 'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
+ 'age_limit': 1,
+ 'subtitles': {
+ 'nl': [{
+ 'ext': 'srt',
+ 'url': r're:^https://.+\.srt$',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _QUALITIES = {
+ 'Lowest': (64, 180, 320),
+ 'Low': (64, 270, 480),
+ 'Medium': (96, 360, 640),
+ 'High': (128, 540, 960),
+ 'Highest': (128, 720, 1280),
+ }
+
+ def _real_extract(self, url):
+ locale, video_id = re.match(self._VALID_URL, url).groups()
+ countries = [locale.split('-')[1].upper()]
+ self._initialize_geo_bypass({
+ 'countries': countries,
+ })
+
+ try:
+ item = self._download_json(
+ # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
+ 'https://services.slingshot.lego.com/mediaplayer/v2',
+ video_id, query={
+ 'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
+ }, headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
+ self.raise_geo_restricted(countries=countries)
+ raise
+
+ video = item['Video']
+ video_id = video['Id']
+ title = video['Title']
+
+ q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
+ formats = []
+ for video_source in item.get('VideoFormats', []):
+ video_source_url = video_source.get('Url')
+ if not video_source_url:
+ continue
+ video_source_format = video_source.get('Format')
+ if video_source_format == 'F4M':
+ formats.extend(self._extract_f4m_formats(
+ video_source_url, video_id,
+ f4m_id=video_source_format, fatal=False))
+ elif video_source_format == 'M3U8':
+ formats.extend(self._extract_m3u8_formats(
+ video_source_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ else:
+ video_source_quality = video_source.get('Quality')
+ format_id = []
+ for v in (video_source_format, video_source_quality):
+ if v:
+ format_id.append(v)
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'quality': q(video_source_quality),
+ 'url': video_source_url,
+ }
+ quality = self._QUALITIES.get(video_source_quality)
+ if quality:
+ f.update({
+ 'abr': quality[0],
+ 'height': quality[1],
+ 'width': quality[2],
+ }),
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ sub_file_id = video.get('SubFileId')
+ if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
+ net_storage_path = video.get('NetstoragePath')
+ invariant_id = video.get('InvariantId')
+ video_file_id = video.get('VideoFileId')
+ video_version = video.get('VideoVersion')
+ if net_storage_path and invariant_id and video_file_id and video_version:
+ subtitles.setdefault(locale[:2], []).append({
+ 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('Description'),
+ 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
+ 'duration': int_or_none(video.get('Length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'age_limit': int_or_none(video.get('AgeFrom')),
+ 'season': video.get('SeasonTitle'),
+ 'season_number': int_or_none(video.get('Season')) or None,
+ 'episode_number': int_or_none(video.get('Episode')) or None,
+ }
diff --git a/hypervideo_dl/extractor/lemonde.py b/hypervideo_dl/extractor/lemonde.py
new file mode 100644
index 0000000..3306892
--- /dev/null
+++ b/hypervideo_dl/extractor/lemonde.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LemondeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
+ 'md5': 'da120c8722d8632eec6ced937536cc98',
+ 'info_dict': {
+ 'id': 'lqm3kl',
+ 'ext': 'mp4',
+ 'title': "Comprendre l'affaire Bygmalion en 5 minutes",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 309,
+ 'upload_date': '20160119',
+ 'timestamp': 1453194778,
+ 'uploader_id': '3pmkp',
+ },
+ }, {
+ # standard iframe embed
+ 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
+ 'info_dict': {
+ 'id': 'uzsxms',
+ 'ext': 'mp4',
+ 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 325,
+ 'upload_date': '20161021',
+ 'timestamp': 1477044540,
+ 'uploader_id': '3pmkp',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
+ 'only_matching': True,
+ }, {
+ # YouTube embeds
+ 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ digiteka_url = self._proto_relative_url(self._search_regex(
+ r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
+ webpage, 'digiteka url', group='url', default=None))
+
+ if digiteka_url:
+ return self.url_result(digiteka_url, 'Digiteka')
+
+ return self.url_result(url, 'Generic')
diff --git a/hypervideo_dl/extractor/lenta.py b/hypervideo_dl/extractor/lenta.py
new file mode 100644
index 0000000..2ebd4e5
--- /dev/null
+++ b/hypervideo_dl/extractor/lenta.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LentaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
+ 'info_dict': {
+ 'id': '964400',
+ 'ext': 'mp4',
+ 'title': 'Надежду Савченко задержали',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 61,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # EaglePlatform iframe embed
+ 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
+ default=None)
+ if video_id:
+ return self.url_result(
+ 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
+ ie='EaglePlatform', video_id=video_id)
+
+ return self.url_result(url, ie='Generic')
diff --git a/hypervideo_dl/extractor/libraryofcongress.py b/hypervideo_dl/extractor/libraryofcongress.py
new file mode 100644
index 0000000..03f2051
--- /dev/null
+++ b/hypervideo_dl/extractor/libraryofcongress.py
@@ -0,0 +1,153 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_filesize,
+)
+
+
+class LibraryOfCongressIE(InfoExtractor):
+ IE_NAME = 'loc'
+ IE_DESC = 'Library of Congress'
+ _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
+ _TESTS = [{
+ # embedded via <div class="media-player"
+ 'url': 'http://loc.gov/item/90716351/',
+ 'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
+ 'info_dict': {
+ 'id': '90716351',
+ 'ext': 'mp4',
+ 'title': "Pa's trip to Mars",
+ 'duration': 0,
+ 'view_count': int,
+ },
+ }, {
+ # webcast embedded via mediaObjectId
+ 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
+ 'info_dict': {
+ 'id': '5578',
+ 'ext': 'mp4',
+ 'title': 'Help! Preservation Training Needs Here, There & Everywhere',
+ 'duration': 3765,
+ 'view_count': int,
+ 'subtitles': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with direct download links
+ 'url': 'https://www.loc.gov/item/78710669/',
+ 'info_dict': {
+ 'id': '78710669',
+ 'ext': 'mp4',
+ 'title': 'La vie et la passion de Jesus-Christ',
+ 'duration': 0,
+ 'view_count': int,
+ 'formats': 'mincount:4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.loc.gov/item/ihas.200197114/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ media_id = self._search_regex(
+ (r'id=(["\'])media-player-(?P<id>.+?)\1',
+ r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
+ r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
+ r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
+ r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
+ webpage, 'media id', group='id')
+
+ data = self._download_json(
+ 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
+ media_id)['mediaObject']
+
+ derivative = data['derivatives'][0]
+ media_url = derivative['derivativeUrl']
+
+ title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
+ webpage)
+
+ # Following algorithm was extracted from setAVSource js function
+ # found in webpage
+ media_url = media_url.replace('rtmp', 'https')
+
+ is_video = data.get('mediaType', 'v').lower() == 'v'
+ ext = determine_ext(media_url)
+ if ext not in ('mp4', 'mp3'):
+ media_url += '.mp4' if is_video else '.mp3'
+
+ formats = []
+ if '/vod/mp4:' in media_url:
+ formats.append({
+ 'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
+ 'format_id': 'hls',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'quality': 1,
+ })
+ http_format = {
+ 'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
+ 'format_id': 'http',
+ 'quality': 1,
+ }
+ if not is_video:
+ http_format['vcodec'] = 'none'
+ formats.append(http_format)
+
+ download_urls = set()
+ for m in re.finditer(
+ r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?:&nbsp;|\s+)\((?P<size>.+?)\))?\s*<', webpage):
+ format_id = m.group('id').lower()
+ if format_id in ('gif', 'jpeg'):
+ continue
+ download_url = m.group('url')
+ if download_url in download_urls:
+ continue
+ download_urls.add(download_url)
+ formats.append({
+ 'url': download_url,
+ 'format_id': format_id,
+ 'filesize_approx': parse_filesize(m.group('size')),
+ })
+
+ self._sort_formats(formats)
+
+ duration = float_or_none(data.get('duration'))
+ view_count = int_or_none(data.get('viewCount'))
+
+ subtitles = {}
+ cc_url = data.get('ccUrl')
+ if cc_url:
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ 'ext': 'ttml',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/libsyn.py b/hypervideo_dl/extractor/libsyn.py
new file mode 100644
index 0000000..2cf4442
--- /dev/null
+++ b/hypervideo_dl/extractor/libsyn.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ parse_duration,
+ strip_or_none,
+ unified_strdate,
+)
+
+
+class LibsynIE(InfoExtractor):
+ _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
+
+ _TESTS = [{
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
+ 'md5': '2a55e75496c790cdeb058e7e6c087746',
+ 'info_dict': {
+ 'id': '6385796',
+ 'ext': 'mp3',
+ 'title': "Champion Minded - Developing a Growth Mindset",
+ # description fetched using another request:
+ # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
+ # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
+ 'upload_date': '20180320',
+ 'thumbnail': 're:^https?://.*',
+ },
+ }, {
+ 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
+ 'md5': '6c5cb21acd622d754d3b1a92b582ce42',
+ 'info_dict': {
+ 'id': '3727166',
+ 'ext': 'mp3',
+ 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
+ 'upload_date': '20150818',
+ 'thumbnail': 're:^https?://.*',
+ }
+ }]
+
+ def _real_extract(self, url):
+ url, video_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(self._search_regex(
+ r'var\s+playlistItem\s*=\s*({.+?});',
+ webpage, 'JSON data block'), video_id)
+
+ episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
+ if not episode_title:
+ self._search_regex(
+ [r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
+ webpage, 'episode title')
+ episode_title = episode_title.strip()
+
+ podcast_title = strip_or_none(clean_html(self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'podcast title',
+ default=None) or get_element_by_class('podcast-title', webpage)))
+
+ title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+ formats = []
+ for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
+ f_url = data.get(k)
+ if not f_url:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+
+ description = self._html_search_regex(
+ r'<p\s+id="info_text_body">(.+?)</p>', webpage,
+ 'description', default=None)
+ if description:
+ # Strip non-breaking and normal spaces
+ description = description.replace('\u00A0', ' ').strip()
+ release_date = unified_strdate(self._search_regex(
+ r'<div class="release_date">Released: ([^<]+)<',
+ webpage, 'release date', default=None) or data.get('release_date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': data.get('thumbnail_url'),
+ 'upload_date': release_date,
+ 'duration': parse_duration(data.get('duration')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/lifenews.py b/hypervideo_dl/extractor/lifenews.py
new file mode 100644
index 0000000..42e263b
--- /dev/null
+++ b/hypervideo_dl/extractor/lifenews.py
@@ -0,0 +1,239 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ remove_end,
+)
+
+
+class LifeNewsIE(InfoExtractor):
+ IE_NAME = 'life'
+ IE_DESC = 'Life.ru'
+ _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
+
+ _TESTS = [{
+ # single video embedded via video/source
+ 'url': 'https://life.ru/t/новости/98736',
+ 'md5': '77c95eaefaca216e32a76a343ad89d23',
+ 'info_dict': {
+ 'id': '98736',
+ 'ext': 'mp4',
+ 'title': 'Мужчина нашел дома архив оборонного завода',
+ 'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
+ 'timestamp': 1344154740,
+ 'upload_date': '20120805',
+ 'view_count': int,
+ }
+ }, {
+ # single video embedded via iframe
+ 'url': 'https://life.ru/t/новости/152125',
+ 'md5': '77d19a6f0886cd76bdbf44b4d971a273',
+ 'info_dict': {
+ 'id': '152125',
+ 'ext': 'mp4',
+ 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
+ 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+ 'timestamp': 1427961840,
+ 'upload_date': '20150402',
+ 'view_count': int,
+ }
+ }, {
+ # two videos embedded via iframe
+ 'url': 'https://life.ru/t/новости/153461',
+ 'info_dict': {
+ 'id': '153461',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'timestamp': 1430825520,
+ 'view_count': int,
+ },
+ 'playlist': [{
+ 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+ 'info_dict': {
+ 'id': '153461-video1',
+ 'ext': 'mp4',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'timestamp': 1430825520,
+ 'upload_date': '20150505',
+ },
+ }, {
+ 'md5': 'ebb3bf3b1ce40e878d0d628e93eb0322',
+ 'info_dict': {
+ 'id': '153461-video2',
+ 'ext': 'mp4',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'timestamp': 1430825520,
+ 'upload_date': '20150505',
+ },
+ }],
+ }, {
+ 'url': 'https://life.ru/t/новости/213035',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_urls = re.findall(
+ r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
+
+ iframe_links = re.findall(
+ r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
+ webpage)
+
+ if not video_urls and not iframe_links:
+ raise ExtractorError('No media links available for %s' % video_id)
+
+ title = remove_end(
+ self._og_search_title(webpage),
+ ' - Life.ru')
+
+ description = self._og_search_description(webpage)
+
+ view_count = self._html_search_regex(
+ r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
+ webpage, 'view count', fatal=False, group='value')
+
+ timestamp = parse_iso8601(self._search_regex(
+ r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
+ webpage, 'upload date', fatal=False, group='value'))
+
+ common_info = {
+ 'description': description,
+ 'view_count': int_or_none(view_count),
+ 'timestamp': timestamp,
+ }
+
+ def make_entry(video_id, video_url, index=None):
+ cur_info = dict(common_info)
+ cur_info.update({
+ 'id': video_id if not index else '%s-video%s' % (video_id, index),
+ 'url': video_url,
+ 'title': title if not index else '%s (Видео %s)' % (title, index),
+ })
+ return cur_info
+
+ def make_video_entry(video_id, video_url, index=None):
+ video_url = compat_urlparse.urljoin(url, video_url)
+ return make_entry(video_id, video_url, index)
+
+ def make_iframe_entry(video_id, video_url, index=None):
+ video_url = self._proto_relative_url(video_url, 'http:')
+ cur_info = make_entry(video_id, video_url, index)
+ cur_info['_type'] = 'url_transparent'
+ return cur_info
+
+ if len(video_urls) == 1 and not iframe_links:
+ return make_video_entry(video_id, video_urls[0])
+
+ if len(iframe_links) == 1 and not video_urls:
+ return make_iframe_entry(video_id, iframe_links[0])
+
+ entries = []
+
+ if video_urls:
+ for num, video_url in enumerate(video_urls, 1):
+ entries.append(make_video_entry(video_id, video_url, num))
+
+ if iframe_links:
+ for num, iframe_link in enumerate(iframe_links, len(video_urls) + 1):
+ entries.append(make_iframe_entry(video_id, iframe_link, num))
+
+ playlist = common_info.copy()
+ playlist.update(self.playlist_result(entries, video_id, title, description))
+ return playlist
+
+
+class LifeEmbedIE(InfoExtractor):
+ IE_NAME = 'life:embed'
+ _VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
+
+ _TESTS = [{
+ 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
+ 'md5': 'b889715c9e49cb1981281d0e5458fbbe',
+ 'info_dict': {
+ 'id': 'e50c2dec2867350528e2574c899b8291',
+ 'ext': 'mp4',
+ 'title': 'e50c2dec2867350528e2574c899b8291',
+ 'thumbnail': r're:http://.*\.jpg',
+ }
+ }, {
+ # with 1080p
+ 'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ thumbnail = None
+ formats = []
+
+ def extract_m3u8(manifest_url):
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='m3u8'))
+
+ def extract_original(original_url):
+ formats.append({
+ 'url': original_url,
+ 'format_id': determine_ext(original_url, None),
+ 'preference': 1,
+ })
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
+ video_id).get('playlist', {})
+ if playlist:
+ master = playlist.get('master')
+ if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
+ extract_m3u8(compat_urlparse.urljoin(url, master))
+ original = playlist.get('original')
+ if isinstance(original, compat_str):
+ extract_original(original)
+ thumbnail = playlist.get('image')
+
+ # Old rendition fallback
+ if not formats:
+ for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
+ video_url = compat_urlparse.urljoin(url, video_url)
+ if determine_ext(video_url) == 'm3u8':
+ extract_m3u8(video_url)
+ else:
+ extract_original(video_url)
+
+ self._sort_formats(formats)
+
+ thumbnail = thumbnail or self._search_regex(
+ r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/limelight.py b/hypervideo_dl/extractor/limelight.py
new file mode 100644
index 0000000..39f74d2
--- /dev/null
+++ b/hypervideo_dl/extractor/limelight.py
@@ -0,0 +1,358 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ try_get,
+ unsmuggle_url,
+ ExtractorError,
+)
+
+
+class LimelightBaseIE(InfoExtractor):
+ _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
+
+ @classmethod
+ def _extract_urls(cls, webpage, source_url):
+ lm = {
+ 'Media': 'media',
+ 'Channel': 'channel',
+ 'ChannelList': 'channel_list',
+ }
+
+ def smuggle(url):
+ return smuggle_url(url, {'source_url': source_url})
+
+ entries = []
+ for kind, video_id in re.findall(
+ r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:%s:%s' % (lm[kind], video_id)),
+ 'Limelight%s' % kind, video_id))
+ for mobj in re.finditer(
+ # As per [1] class attribute should be exactly equal to
+ # LimelightEmbeddedPlayerFlash but numerous examples seen
+ # that don't exactly match it (e.g. [2]).
+ # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
+ # 2. http://www.sedona.com/FacilitatorTraining2017
+ r'''(?sx)
+ <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
+ <param[^>]+
+ name=(["\'])flashVars\2[^>]+
+ value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
+ ''', webpage):
+ kind, video_id = mobj.group('kind'), mobj.group('id')
+ entries.append(cls.url_result(
+ smuggle('limelight:%s:%s' % (kind, video_id)),
+ 'Limelight%s' % kind.capitalize(), video_id))
+ # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
+ for video_id in re.findall(
+ r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:media:%s' % video_id),
+ LimelightMediaIE.ie_key(), video_id))
+ return entries
+
+ def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ try:
+ return self._download_json(
+ self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
+ item_id, 'Downloading PlaylistService %s JSON' % method,
+ fatal=fatal, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
+ if error == 'CountryDisabled':
+ self.raise_geo_restricted()
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract(self, item_id, pc_method, mobile_method, referer=None):
+ pc = self._call_playlist_service(item_id, pc_method, referer=referer)
+ mobile = self._call_playlist_service(
+ item_id, mobile_method, fatal=False, referer=referer)
+ return pc, mobile
+
+ def _extract_info(self, pc, mobile, i, referer):
+ get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
+ pc_item = get_item(pc, 'playlistItems')
+ mobile_item = get_item(mobile, 'mediaList')
+ video_id = pc_item.get('mediaId') or mobile_item['mediaId']
+ title = pc_item.get('title') or mobile_item['title']
+
+ formats = []
+ urls = []
+ for stream in pc_item.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url or stream.get('drmProtected') or stream_url in urls:
+ continue
+ urls.append(stream_url)
+ ext = determine_ext(stream_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id='hds', fatal=False))
+ else:
+ fmt = {
+ 'url': stream_url,
+ 'abr': float_or_none(stream.get('audioBitRate')),
+ 'fps': float_or_none(stream.get('videoFrameRate')),
+ 'ext': ext,
+ }
+ width = int_or_none(stream.get('videoWidthInPixels'))
+ height = int_or_none(stream.get('videoHeightInPixels'))
+ vbr = float_or_none(stream.get('videoBitRate'))
+ if width or height or vbr:
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'vbr': vbr,
+ })
+ else:
+ fmt['vcodec'] = 'none'
+ rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
+ if rtmp:
+ format_id = 'rtmp'
+ if stream.get('videoBitRate'):
+ format_id += '-%d' % int_or_none(stream['videoBitRate'])
+ http_format_id = format_id.replace('rtmp', 'http')
+
+ CDN_HOSTS = (
+ ('delvenetworks.com', 'cpl.delvenetworks.com'),
+ ('video.llnw.net', 's2.content.video.llnw.net'),
+ )
+ for cdn_host, http_host in CDN_HOSTS:
+ if cdn_host not in rtmp.group('host').lower():
+ continue
+ http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
+ urls.append(http_url)
+ if self._is_valid_url(http_url, video_id, http_format_id):
+ http_fmt = fmt.copy()
+ http_fmt.update({
+ 'url': http_url,
+ 'format_id': http_format_id,
+ })
+ formats.append(http_fmt)
+ break
+
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': format_id,
+ })
+ formats.append(fmt)
+
+ for mobile_url in mobile_item.get('mobileUrls', []):
+ media_url = mobile_url.get('mobileUrl')
+ format_id = mobile_url.get('targetMediaPlatform')
+ if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
+ continue
+ urls.append(media_url)
+ ext = determine_ext(media_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': media_url,
+ 'format_id': format_id,
+ 'preference': -1,
+ 'ext': ext,
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for flag in mobile_item.get('flags'):
+ if flag == 'ClosedCaptions':
+ closed_captions = self._call_playlist_service(
+ video_id, 'getClosedCaptionsDetailsByMediaId',
+ False, referer) or []
+ for cc in closed_captions:
+ cc_url = cc.get('webvttFileUrl')
+ if not cc_url:
+ continue
+ lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ break
+
+ get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get_meta('description'),
+ 'formats': formats,
+ 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
+ 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
+ 'subtitles': subtitles,
+ }
+
+
+class LimelightMediaIE(LimelightBaseIE):
+ IE_NAME = 'limelight'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:media:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bmediaId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
+ 'info_dict': {
+ 'id': '3ffd040b522b4485b6d84effc750cd86',
+ 'ext': 'mp4',
+ 'title': 'HaP and the HB Prince Trailer',
+ 'description': 'md5:8005b944181778e313d95c1237ddb640',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
+ 'duration': 144.23,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # video with subtitles
+ 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
+ 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
+ 'info_dict': {
+ 'id': 'a3e00274d4564ec4a9b29b9466432335',
+ 'ext': 'mp4',
+ 'title': '3Play Media Overview Video',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
+ 'duration': 78.101,
+ # TODO: extract all languages that were accessible via API
+ # 'subtitles': 'mincount:9',
+ 'subtitles': 'mincount:1',
+ },
+ }, {
+ 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'media'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ pc, mobile = self._extract(
+ video_id, 'getPlaylistByMediaId',
+ 'getMobilePlaylistByMediaId', source_url)
+
+ return self._extract_info(pc, mobile, 0, source_url)
+
+
+class LimelightChannelIE(LimelightBaseIE):
+ IE_NAME = 'limelight:channel'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:channel:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bchannelId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
+ 'info_dict': {
+ 'id': 'ab6a524c379342f9b23642917020c082',
+ 'title': 'Javascript Sample Code',
+ 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'channel'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ channel_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
+
+ pc, mobile = self._extract(
+ channel_id, 'getPlaylistByChannelId',
+ 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
+ source_url)
+
+ entries = [
+ self._extract_info(pc, mobile, i, source_url)
+ for i in range(len(pc['playlistItems']))]
+
+ return self.playlist_result(
+ entries, channel_id, pc.get('title'), mobile.get('description'))
+
+
+class LimelightChannelListIE(LimelightBaseIE):
+ IE_NAME = 'limelight:channel_list'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:channel_list:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bchannelListId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
+ 'info_dict': {
+ 'id': '301b117890c4465c8179ede21fd92e2b',
+ 'title': 'Website - Hero Player',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'channel_list'
+
+ def _real_extract(self, url):
+ channel_list_id = self._match_id(url)
+
+ channel_list = self._call_playlist_service(
+ channel_list_id, 'getMobileChannelListById')
+
+ entries = [
+ self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
+ for channel in channel_list['channelList']]
+
+ return self.playlist_result(
+ entries, channel_list_id, channel_list['title'])
diff --git a/hypervideo_dl/extractor/line.py b/hypervideo_dl/extractor/line.py
new file mode 100644
index 0000000..2526daa
--- /dev/null
+++ b/hypervideo_dl/extractor/line.py
@@ -0,0 +1,230 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+)
+
+
+class LineTVIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
+ 'info_dict': {
+ 'id': '793123_ep1-1',
+ 'ext': 'mp4',
+ 'title': 'Goodbye Mr.Black | EP.1-1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 998.509,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ series_id, segment = re.match(self._VALID_URL, url).groups()
+ video_id = '%s_%s' % (series_id, segment)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_params = self._parse_json(self._search_regex(
+ r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
+ video_id, transform_source=js_to_json)
+
+ video_info = self._download_json(
+ 'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
+ video_id, query={
+ 'videoId': player_params['videoId'],
+ 'key': player_params['key'],
+ })
+
+ stream = video_info['streams'][0]
+ extra_query = '?__gda__=' + stream['key']['value']
+ formats = self._extract_m3u8_formats(
+ stream['source'] + extra_query, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ for a_format in formats:
+ a_format['url'] += extra_query
+
+ duration = None
+ for video in video_info.get('videos', {}).get('list', []):
+ encoding_option = video.get('encodingOption', {})
+ abr = video['bitrate']['audio']
+ vbr = video['bitrate']['video']
+ tbr = abr + vbr
+ formats.append({
+ 'url': video['source'],
+ 'format_id': 'http-%d' % int(tbr),
+ 'height': encoding_option.get('height'),
+ 'width': encoding_option.get('width'),
+ 'abr': abr,
+ 'vbr': vbr,
+ 'filesize': video.get('size'),
+ })
+ if video.get('duration') and duration is None:
+ duration = video['duration']
+
+ self._sort_formats(formats)
+
+ if not formats[0].get('width'):
+ formats[0]['vcodec'] = 'none'
+
+ title = self._og_search_title(webpage)
+
+ # like_count requires an additional API request https://tv.line.me/api/likeit/getCount
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'extra_param_to_segment_url': extra_query[1:],
+ 'duration': duration,
+ 'thumbnails': [{'url': thumbnail['source']}
+ for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
+ 'view_count': video_info.get('meta', {}).get('count'),
+ }
+
+
+class LineLiveBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
+
+ def _parse_broadcast_item(self, item):
+ broadcast_id = compat_str(item['id'])
+ title = item['title']
+ is_live = item.get('isBroadcastingNow')
+
+ thumbnails = []
+ for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ })
+
+ channel = item.get('channel') or {}
+ channel_id = str_or_none(channel.get('id'))
+
+ return {
+ 'id': broadcast_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'thumbnails': thumbnails,
+ 'timestamp': int_or_none(item.get('createdAt')),
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
+ 'duration': int_or_none(item.get('archiveDuration')),
+ 'view_count': int_or_none(item.get('viewerCount')),
+ 'comment_count': int_or_none(item.get('chatCount')),
+ 'is_live': is_live,
+ }
+
+
+class LineLiveIE(LineLiveBaseIE):
+ _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
+ 'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
+ 'info_dict': {
+ 'id': '16331360',
+ 'title': '振りコピ講座😙😙😙',
+ 'ext': 'mp4',
+ 'timestamp': 1617095132,
+ 'upload_date': '20210330',
+ 'channel': '白川ゆめか',
+ 'channel_id': '4867368',
+ 'view_count': int,
+ 'comment_count': int,
+ 'is_live': False,
+ }
+ }, {
+ # archiveStatus == 'DELETED'
+ 'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
+ broadcast = self._download_json(
+ self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
+ broadcast_id)
+ item = broadcast['item']
+ info = self._parse_broadcast_item(item)
+ protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
+ formats = []
+ for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
+ if not v:
+ continue
+ if k == 'abr':
+ formats.extend(self._extract_m3u8_formats(
+ v, broadcast_id, 'mp4', protocol,
+ m3u8_id='hls', fatal=False))
+ continue
+ f = {
+ 'ext': 'mp4',
+ 'format_id': 'hls-' + k,
+ 'protocol': protocol,
+ 'url': v,
+ }
+ if not k.isdigit():
+ f['vcodec'] = 'none'
+ formats.append(f)
+ if not formats:
+ archive_status = item.get('archiveStatus')
+ if archive_status != 'ARCHIVED':
+ raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
+ self._sort_formats(formats)
+ info['formats'] = formats
+ return info
+
+
+class LineLiveChannelIE(LineLiveBaseIE):
+ _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
+ _TEST = {
+ 'url': 'https://live.line.me/channels/5893542',
+ 'info_dict': {
+ 'id': '5893542',
+ 'title': 'いくらちゃん',
+ 'description': 'md5:c3a4af801f43b2fac0b02294976580be',
+ },
+ 'playlist_mincount': 29
+ }
+
+ def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
+ while True:
+ for row in (archived_broadcasts.get('rows') or []):
+ share_url = str_or_none(row.get('shareURL'))
+ if not share_url:
+ continue
+ info = self._parse_broadcast_item(row)
+ info.update({
+ '_type': 'url',
+ 'url': share_url,
+ 'ie_key': LineLiveIE.ie_key(),
+ })
+ yield info
+ if not archived_broadcasts.get('hasNextPage'):
+ return
+ archived_broadcasts = self._download_json(
+ self._API_BASE_URL + channel_id + '/archived_broadcasts',
+ channel_id, query={
+ 'lastId': info['id'],
+ })
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
+ return self.playlist_result(
+ self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
+ channel_id, channel.get('title'), channel.get('information'))
diff --git a/hypervideo_dl/extractor/linkedin.py b/hypervideo_dl/extractor/linkedin.py
new file mode 100644
index 0000000..26fc703
--- /dev/null
+++ b/hypervideo_dl/extractor/linkedin.py
@@ -0,0 +1,182 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinkedInLearningBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'linkedin'
+ _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning'
+
+ def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
+ query = {
+ 'courseSlug': course_slug,
+ 'fields': fields,
+ 'q': 'slugs',
+ }
+ sub = ''
+ if video_slug:
+ query.update({
+ 'videoSlug': video_slug,
+ 'resolution': '_%s' % resolution,
+ })
+ sub = ' %dp' % resolution
+ api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
+ return self._download_json(
+ api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
+ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
+ }, query=query)['elements'][0]
+
+ def _get_urn_id(self, video_data):
+ urn = video_data.get('urn')
+ if urn:
+ mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
+ if mobj:
+ return mobj.group(1)
+
+ def _get_video_id(self, video_data, course_slug, video_slug):
+ return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+ action_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
+ default='https://www.linkedin.com/uas/login-submit', group='url'))
+ data = self._hidden_inputs(login_page)
+ data.update({
+ 'session_key': email,
+ 'session_password': password,
+ })
+ login_submit_page = self._download_webpage(
+ action_url, None, 'Logging in',
+ data=urlencode_postdata(data))
+ error = self._search_regex(
+ r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
+ login_submit_page, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+
+class LinkedInLearningIE(LinkedInLearningBaseIE):
+ IE_NAME = 'linkedin:learning'
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
+ 'md5': 'a1d74422ff0d5e66a792deb996693167',
+ 'info_dict': {
+ 'id': '90426',
+ 'ext': 'mp4',
+ 'title': 'Welcome',
+ 'timestamp': 1430396150.82,
+ 'upload_date': '20150430',
+ },
+ }
+
+ def _real_extract(self, url):
+ course_slug, video_slug = re.match(self._VALID_URL, url).groups()
+
+ video_data = None
+ formats = []
+ for width, height in ((640, 360), (960, 540), (1280, 720)):
+ video_data = self._call_api(
+ course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
+
+ video_url_data = video_data.get('url') or {}
+ progressive_url = video_url_data.get('progressiveUrl')
+ if progressive_url:
+ formats.append({
+ 'format_id': 'progressive-%dp' % height,
+ 'url': progressive_url,
+ 'height': height,
+ 'width': width,
+ 'source_preference': 1,
+ })
+
+ title = video_data['title']
+
+ audio_url = video_data.get('audio', {}).get('progressiveUrl')
+ if audio_url:
+ formats.append({
+ 'abr': 64,
+ 'ext': 'm4a',
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ streaming_url = video_url_data.get('streamingUrl')
+ if streaming_url:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_slug, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
+
+ return {
+ 'id': self._get_video_id(video_data, course_slug, video_slug),
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('defaultThumbnail'),
+ 'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
+ 'duration': int_or_none(video_data.get('durationInSeconds')),
+ }
+
+
+class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
+ IE_NAME = 'linkedin:learning:course'
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
+ 'info_dict': {
+ 'id': 'programming-foundations-fundamentals',
+ 'title': 'Programming Foundations: Fundamentals',
+ 'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
+ },
+ 'playlist_mincount': 61,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_slug = self._match_id(url)
+ course_data = self._call_api(course_slug, 'chapters,description,title')
+
+ entries = []
+ for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
+ chapter_title = chapter.get('title')
+ chapter_id = self._get_urn_id(chapter)
+ for video in chapter.get('videos', []):
+ video_slug = video.get('slug')
+ if not video_slug:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'id': self._get_video_id(video, course_slug, video_slug),
+ 'title': video.get('title'),
+ 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_number,
+ 'chapter_id': chapter_id,
+ 'ie_key': LinkedInLearningIE.ie_key(),
+ })
+
+ return self.playlist_result(
+ entries, course_slug,
+ course_data.get('title'),
+ course_data.get('description'))
diff --git a/hypervideo_dl/extractor/linuxacademy.py b/hypervideo_dl/extractor/linuxacademy.py
new file mode 100644
index 0000000..7ec4a65
--- /dev/null
+++ b/hypervideo_dl/extractor/linuxacademy.py
@@ -0,0 +1,243 @@
+from __future__ import unicode_literals
+
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ js_to_json,
+ parse_duration,
+ try_get,
+ unified_timestamp,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinuxAcademyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?linuxacademy\.com/cp/
+ (?:
+ courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
+ modules/view/id/(?P<course_id>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
+ 'info_dict': {
+ 'id': '7971-2',
+ 'ext': 'mp4',
+ 'title': 'What Is Data Science',
+ 'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
+ 'timestamp': 1607387907,
+ 'upload_date': '20201208',
+ 'duration': 304,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Linux Academy account credentials',
+ }, {
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://linuxacademy.com/cp/modules/view/id/154',
+ 'info_dict': {
+ 'id': '154',
+ 'title': 'AWS Certified Cloud Practitioner',
+ 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
+ 'duration': 28835,
+ },
+ 'playlist_count': 41,
+ 'skip': 'Requires Linux Academy account credentials',
+ }]
+
+ _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
+ _ORIGIN_URL = 'https://linuxacademy.com'
+ _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
+ _NETRC_MACHINE = 'linuxacademy'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def random_string():
+ return ''.join([
+ random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
+ for _ in range(32)])
+
+ webpage, urlh = self._download_webpage_handle(
+ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
+ 'client_id': self._CLIENT_ID,
+ 'response_type': 'token id_token',
+ 'response_mode': 'web_message',
+ 'redirect_uri': self._ORIGIN_URL,
+ 'scope': 'openid email user_impersonation profile',
+ 'audience': self._ORIGIN_URL,
+ 'state': random_string(),
+ 'nonce': random_string(),
+ })
+
+ login_data = self._parse_json(
+ self._search_regex(
+ r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'login info', group='value'), None,
+ transform_source=lambda x: compat_b64decode(x).decode('utf-8')
+ )['extraParams']
+
+ login_data.update({
+ 'client_id': self._CLIENT_ID,
+ 'redirect_uri': self._ORIGIN_URL,
+ 'tenant': 'lacausers',
+ 'connection': 'Username-Password-Authentication',
+ 'username': username,
+ 'password': password,
+ 'sso': 'true',
+ })
+
+ login_state_url = urlh.geturl()
+
+ try:
+ login_page = self._download_webpage(
+ 'https://login.linuxacademy.com/usernamepassword/login', None,
+ 'Downloading login page', data=json.dumps(login_data).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read(), None)
+ message = error.get('description') or error['code']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+ raise
+
+ callback_page, urlh = self._download_webpage_handle(
+ 'https://login.linuxacademy.com/login/callback', None,
+ 'Downloading callback page',
+ data=urlencode_postdata(self._hidden_inputs(login_page)),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+
+ access_token = self._search_regex(
+ r'access_token=([^=&]+)', urlh.geturl(),
+ 'access token', default=None)
+ if not access_token:
+ access_token = self._parse_json(
+ self._search_regex(
+ r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
+ 'authorization response'), None,
+ transform_source=js_to_json)['response']['access_token']
+
+ self._download_webpage(
+ 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
+ % access_token, None, 'Downloading token validation page')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
+ item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
+
+ webpage = self._download_webpage(url, item_id)
+
+ # course path
+ if course_id:
+ module = self._parse_json(
+ self._search_regex(
+ r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
+ item_id)
+ entries = []
+ chapter_number = None
+ chapter = None
+ chapter_id = None
+ for item in module['items']:
+ if not isinstance(item, dict):
+ continue
+
+ def type_field(key):
+ return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
+ type_fields = (type_field('name'), type_field('slug'))
+ # Move to next module section
+ if 'section' in type_fields:
+ chapter = item.get('course_name')
+ chapter_id = item.get('course_module')
+ chapter_number = 1 if not chapter_number else chapter_number + 1
+ continue
+ # Skip non-lessons
+ if 'lesson' not in type_fields:
+ continue
+ lesson_url = urljoin(url, item.get('url'))
+ if not lesson_url:
+ continue
+ title = item.get('title') or item.get('lesson_name')
+ description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': lesson_url,
+ 'ie_key': LinuxAcademyIE.ie_key(),
+ 'title': title,
+ 'description': description,
+ 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
+ 'duration': parse_duration(item.get('duration')),
+ 'chapter': chapter,
+ 'chapter_id': chapter_id,
+ 'chapter_number': chapter_number,
+ })
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': course_id,
+ 'title': module.get('title'),
+ 'description': module.get('md_desc') or clean_html(module.get('desc')),
+ 'duration': parse_duration(module.get('duration')),
+ }
+
+ # single video path
+ m3u8_url = self._parse_json(
+ self._search_regex(
+ r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
+ item_id)[0]['file']
+ formats = self._extract_m3u8_formats(
+ m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+ info = {
+ 'id': item_id,
+ 'formats': formats,
+ }
+ lesson = self._parse_json(
+ self._search_regex(
+ (r'window\.lesson\s*=\s*({.+?})\s*;',
+ r'player\.lesson\s*=\s*({.+?})\s*;'),
+ webpage, 'lesson', default='{}'), item_id, fatal=False)
+ if lesson:
+ info.update({
+ 'title': lesson.get('lesson_name'),
+ 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
+ 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
+ 'duration': parse_duration(lesson.get('duration')),
+ })
+ if not info.get('title'):
+ info['title'] = self._search_regex(
+ (r'>Lecture\s*:\s*(?P<value>[^<]+)',
+ r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ return info
diff --git a/hypervideo_dl/extractor/litv.py b/hypervideo_dl/extractor/litv.py
new file mode 100644
index 0000000..337b1b1
--- /dev/null
+++ b/hypervideo_dl/extractor/litv.py
@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ smuggle_url,
+ unsmuggle_url,
+)
+
+
+class LiTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
+
+ _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
+
+ _TESTS = [{
+ 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+ 'info_dict': {
+ 'id': 'VOD00041606',
+ 'title': '花千骨',
+ },
+ 'playlist_count': 50,
+ }, {
+ 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+ 'md5': '969e343d9244778cb29acec608e53640',
+ 'info_dict': {
+ 'id': 'VOD00041610',
+ 'ext': 'mp4',
+ 'title': '花千骨第1集',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ 'skip': 'Georestricted to Taiwan',
+ }, {
+ 'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
+ 'md5': '88322ea132f848d6e3e18b32a832b918',
+ 'info_dict': {
+ 'id': 'VOD00044841',
+ 'ext': 'mp4',
+ 'title': '芈月傳第1集 霸星芈月降世楚國',
+ 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
+ },
+ 'skip': 'Georestricted to Taiwan',
+ }]
+
+ def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
+ episode_title = program_info['title']
+ content_id = season_list['contentId']
+
+ if prompt:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
+
+ all_episodes = [
+ self.url_result(smuggle_url(
+ self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
+ {'force_noplaylist': True})) # To prevent infinite recursion
+ for episode in season_list['episode']]
+
+ return self.playlist_result(all_episodes, content_id, episode_title)
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+
+ video_id = self._match_id(url)
+
+ noplaylist = self._downloader.params.get('noplaylist')
+ noplaylist_prompt = True
+ if 'force_noplaylist' in data:
+ noplaylist = data['force_noplaylist']
+ noplaylist_prompt = False
+
+ webpage = self._download_webpage(url, video_id)
+
+ program_info = self._parse_json(self._search_regex(
+ r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+ video_id)
+
+ season_list = list(program_info.get('seasonList', {}).values())
+ if season_list:
+ if not noplaylist:
+ return self._extract_playlist(
+ season_list[0], video_id, program_info,
+ prompt=noplaylist_prompt)
+
+ if noplaylist_prompt:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ # In browsers `getMainUrl` request is always issued. Usually this
+ # endpoint gives the same result as the data embedded in the webpage.
+ # If georestricted, there are no embedded data, so an extra request is
+ # necessary to get the error code
+ if 'assetId' not in program_info:
+ program_info = self._download_json(
+ 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
+ query={'contentId': video_id},
+ headers={'Accept': 'application/json'})
+ video_data = self._parse_json(self._search_regex(
+ r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
+ webpage, 'video data', default='{}'), video_id)
+ if not video_data:
+ payload = {
+ 'assetId': program_info['assetId'],
+ 'watchDevices': program_info['watchDevices'],
+ 'contentType': program_info['contentType'],
+ }
+ video_data = self._download_json(
+ 'https://www.litv.tv/vod/getMainUrl', video_id,
+ data=json.dumps(payload).encode('utf-8'),
+ headers={'Content-Type': 'application/json'})
+
+ if not video_data.get('fullpath'):
+ error_msg = video_data.get('errorMessage')
+ if error_msg == 'vod.error.outsideregionerror':
+ self.raise_geo_restricted('This video is available in Taiwan only')
+ if error_msg:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
+ raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
+
+ formats = self._extract_m3u8_formats(
+ video_data['fullpath'], video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ for a_format in formats:
+ # LiTV HLS segments doesn't like compressions
+ a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
+
+ title = program_info['title'] + program_info.get('secondaryMark', '')
+ description = program_info.get('description')
+ thumbnail = program_info.get('imageFile')
+ categories = [item['name'] for item in program_info.get('category', [])]
+ episode = int_or_none(program_info.get('episode'))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'episode_number': episode,
+ }
diff --git a/hypervideo_dl/extractor/livejournal.py b/hypervideo_dl/extractor/livejournal.py
new file mode 100644
index 0000000..3a9f455
--- /dev/null
+++ b/hypervideo_dl/extractor/livejournal.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class LiveJournalIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
+ 'md5': 'adaf018388572ced8a6f301ace49d4b2',
+ 'info_dict': {
+ 'id': '1263729',
+ 'ext': 'mp4',
+ 'title': 'Истребители против БПЛА',
+ 'upload_date': '20190624',
+ 'timestamp': 1561406715,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ record = self._parse_json(self._search_regex(
+ r'Site\.page\s*=\s*({.+?});', webpage,
+ 'page data'), video_id)['video']['record']
+ storage_id = compat_str(record['storageid'])
+ title = record.get('name')
+ if title:
+ # remove filename extension(.mp4, .mov, etc...)
+ title = title.rsplit('.', 1)[0]
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': record.get('thumbnail'),
+ 'timestamp': int_or_none(record.get('timecreate')),
+ 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
+ 'ie_key': 'EaglePlatform',
+ }
diff --git a/hypervideo_dl/extractor/liveleak.py b/hypervideo_dl/extractor/liveleak.py
new file mode 100644
index 0000000..4ac437c
--- /dev/null
+++ b/hypervideo_dl/extractor/liveleak.py
@@ -0,0 +1,191 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class LiveLeakIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
+ _TESTS = [{
+ 'url': 'http://www.liveleak.com/view?i=757_1364311680',
+ 'md5': '0813c2430bea7a46bf13acf3406992f4',
+ 'info_dict': {
+ 'id': '757_1364311680',
+ 'ext': 'mp4',
+ 'description': 'extremely bad day for this guy..!',
+ 'uploader': 'ljfriel2',
+ 'title': 'Most unlucky car accident',
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ }
+ }, {
+ 'url': 'http://www.liveleak.com/view?i=f93_1390833151',
+ 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
+ 'info_dict': {
+ 'id': 'f93_1390833151',
+ 'ext': 'mp4',
+ 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
+ 'uploader': 'ARD_Stinkt',
+ 'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ }
+ }, {
+ # Prochan embed
+ 'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
+ 'md5': '42c6d97d54f1db107958760788c5f48f',
+ 'info_dict': {
+ 'id': '4f7_1392687779',
+ 'ext': 'mp4',
+ 'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
+ 'uploader': 'CapObveus',
+ 'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
+ 'age_limit': 18,
+ },
+ 'skip': 'Video is dead',
+ }, {
+ # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
+ # Multiple resolutions
+ 'url': 'http://www.liveleak.com/view?i=801_1409392012',
+ 'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
+ 'info_dict': {
+ 'id': '801_1409392012',
+ 'ext': 'mp4',
+ 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
+ 'uploader': 'bony333',
+ 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ }
+ }, {
+ # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
+ 'url': 'http://m.liveleak.com/view?i=763_1473349649',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '763_1473349649',
+ 'ext': 'mp4',
+ 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
+ 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
+ 'uploader': 'Ziz',
+ 'upload_date': '20160908',
+ 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.liveleak.com/view?i=677_1439397581',
+ 'info_dict': {
+ 'id': '677_1439397581',
+ 'title': 'Fuel Depot in China Explosion caught on video',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
+ 'only_matching': True,
+ }, {
+ # No original video
+ 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
+ video_description = self._og_search_description(webpage)
+ video_uploader = self._html_search_regex(
+ r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
+ age_limit = int_or_none(self._search_regex(
+ r'you confirm that you are ([0-9]+) years and over.',
+ webpage, 'age limit', default=None))
+ video_thumbnail = self._og_search_thumbnail(webpage)
+
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+ if not entries:
+ # Maybe an embed?
+ embed_url = self._search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
+ webpage, 'embed URL')
+ return {
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'uploader': video_uploader,
+ 'age_limit': age_limit,
+ }
+
+ for idx, info_dict in enumerate(entries):
+ formats = []
+ for a_format in info_dict['formats']:
+ if not a_format.get('height'):
+ a_format['height'] = int_or_none(self._search_regex(
+ r'([0-9]+)p\.mp4', a_format['url'], 'height label',
+ default=None))
+ formats.append(a_format)
+
+ # Removing '.*.mp4' gives the raw video, which is essentially
+ # the same video without the LiveLeak logo at the top (see
+ # https://github.com/ytdl-org/youtube-dl/pull/4768)
+ orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
+ if a_format['url'] != orig_url:
+ format_id = a_format.get('format_id')
+ format_id = 'original' + ('-' + format_id if format_id else '')
+ if self._is_valid_url(orig_url, video_id, format_id):
+ formats.append({
+ 'format_id': format_id,
+ 'url': orig_url,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+
+ # Don't append entry ID for one-video pages to keep backward compatibility
+ if len(entries) > 1:
+ info_dict['id'] = '%s_%s' % (video_id, idx + 1)
+ else:
+ info_dict['id'] = video_id
+
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'uploader': video_uploader,
+ 'age_limit': age_limit,
+ 'thumbnail': video_thumbnail,
+ })
+
+ return self.playlist_result(entries, video_id, video_title)
+
+
+class LiveLeakEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
+
+ # See generic.py for actual test cases
+ _TESTS = [{
+ 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ kind, video_id = re.match(self._VALID_URL, url).groups()
+
+ if kind == 'f':
+ webpage = self._download_webpage(url, video_id)
+ liveleak_url = self._search_regex(
+ r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
+ webpage, 'LiveLeak URL', group='url')
+ else:
+ liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
+
+ return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
diff --git a/hypervideo_dl/extractor/livestream.py b/hypervideo_dl/extractor/livestream.py
new file mode 100644
index 0000000..e55b1a2
--- /dev/null
+++ b/hypervideo_dl/extractor/livestream.py
@@ -0,0 +1,366 @@
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ find_xpath_attr,
+ xpath_attr,
+ xpath_with_ns,
+ xpath_text,
+ orderedSet,
+ update_url_query,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
+ determine_ext,
+)
+
+
+class LivestreamIE(InfoExtractor):
+ IE_NAME = 'livestream'
+ _VALID_URL = r'https?://(?:new\.)?livestream\.com/(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))/(?:events/(?P<event_id>\d+)|(?P<event_name>[^/]+))(?:/videos/(?P<id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
+ 'md5': '53274c76ba7754fb0e8d072716f2292b',
+ 'info_dict': {
+ 'id': '4719370',
+ 'ext': 'mp4',
+ 'title': 'Live from Webster Hall NYC',
+ 'timestamp': 1350008072,
+ 'upload_date': '20121012',
+ 'duration': 5968.0,
+ 'like_count': int,
+ 'view_count': int,
+ 'thumbnail': r're:^http://.*\.jpg$'
+ }
+ }, {
+ 'url': 'http://new.livestream.com/tedx/cityenglish',
+ 'info_dict': {
+ 'title': 'TEDCity2.0 (English)',
+ 'id': '2245590',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://new.livestream.com/chess24/tatasteelchess',
+ 'info_dict': {
+ 'title': 'Tata Steel Chess',
+ 'id': '3705884',
+ },
+ 'playlist_mincount': 60,
+ }, {
+ 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+ 'only_matching': True,
+ }]
+ _API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ base_ele = find_xpath_attr(
+ smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
+ base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
+
+ formats = []
+ video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
+
+ for vn in video_nodes:
+ tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
+ furl = (
+ update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
+ 'v': '3.0.3',
+ 'fp': 'WIN% 14,0,0,145',
+ }))
+ if 'clipBegin' in vn.attrib:
+ furl += '&ssek=' + vn.attrib['clipBegin']
+ formats.append({
+ 'url': furl,
+ 'format_id': 'smil_%d' % tbr,
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'preference': -1000,
+ })
+ return formats
+
+ def _extract_video_info(self, video_data):
+ video_id = compat_str(video_data['id'])
+
+ FORMAT_KEYS = (
+ ('sd', 'progressive_url'),
+ ('hd', 'progressive_url_hd'),
+ )
+
+ formats = []
+ for format_id, key in FORMAT_KEYS:
+ video_url = video_data.get(key)
+ if video_url:
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ continue
+ bitrate = int_or_none(self._search_regex(
+ r'(\d+)\.%s' % ext, video_url, 'bitrate', default=None))
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': bitrate,
+ 'ext': ext,
+ })
+
+ smil_url = video_data.get('smil_url')
+ if smil_url:
+ formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
+
+ m3u8_url = video_data.get('m3u8_url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ f4m_url = video_data.get('f4m_url')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False))
+ self._sort_formats(formats)
+
+ comments = [{
+ 'author_id': comment.get('author_id'),
+ 'author': comment.get('author', {}).get('full_name'),
+ 'id': comment.get('id'),
+ 'text': comment['text'],
+ 'timestamp': parse_iso8601(comment.get('created_at')),
+ } for comment in video_data.get('comments', {}).get('data', [])]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_data['caption'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail_url'),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'timestamp': parse_iso8601(video_data.get('publish_at')),
+ 'like_count': video_data.get('likes', {}).get('total'),
+ 'comment_count': video_data.get('comments', {}).get('total'),
+ 'view_count': video_data.get('views'),
+ 'comments': comments,
+ }
+
+ def _extract_stream_info(self, stream_info):
+ broadcast_id = compat_str(stream_info['broadcast_id'])
+ is_live = stream_info.get('is_live')
+
+ formats = []
+ smil_url = stream_info.get('play_url')
+ if smil_url:
+ formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
+
+ m3u8_url = stream_info.get('m3u8_url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ rtsp_url = stream_info.get('rtsp_url')
+ if rtsp_url:
+ formats.append({
+ 'url': rtsp_url,
+ 'format_id': 'rtsp',
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': broadcast_id,
+ 'formats': formats,
+ 'title': self._live_title(stream_info['stream_title']) if is_live else stream_info['stream_title'],
+ 'thumbnail': stream_info.get('thumbnail_url'),
+ 'is_live': is_live,
+ }
+
+ def _extract_event(self, event_data):
+ event_id = compat_str(event_data['id'])
+ account_id = compat_str(event_data['owner_account_id'])
+ feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
+
+ stream_info = event_data.get('stream_info')
+ if stream_info:
+ return self._extract_stream_info(stream_info)
+
+ last_video = None
+ entries = []
+ for i in itertools.count(1):
+ if last_video is None:
+ info_url = feed_root_url
+ else:
+ info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+ root=feed_root_url, id=last_video)
+ videos_info = self._download_json(
+ info_url, event_id, 'Downloading page {0}'.format(i))['data']
+ videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+ if not videos_info:
+ break
+ for v in videos_info:
+ v_id = compat_str(v['id'])
+ entries.append(self.url_result(
+ 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id),
+ 'Livestream', v_id, v.get('caption')))
+ last_video = videos_info[-1]['id']
+ return self.playlist_result(entries, event_id, event_data['full_name'])
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ event = mobj.group('event_id') or mobj.group('event_name')
+ account = mobj.group('account_id') or mobj.group('account_name')
+ api_url = self._API_URL_TEMPLATE % (account, event)
+ if video_id:
+ video_data = self._download_json(
+ api_url + '/videos/%s' % video_id, video_id)
+ return self._extract_video_info(video_data)
+ else:
+ event_data = self._download_json(api_url, video_id)
+ return self._extract_event(event_data)
+
+
+# The original version of Livestream uses a different system
+class LivestreamOriginalIE(InfoExtractor):
+ IE_NAME = 'livestream:original'
+ _VALID_URL = r'''(?x)https?://original\.livestream\.com/
+ (?P<user>[^/\?#]+)(?:/(?P<type>video|folder)
+ (?:(?:\?.*?Id=|/)(?P<id>.*?)(&|$))?)?
+ '''
+ _TESTS = [{
+ 'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ 'info_dict': {
+ 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ 'ext': 'mp4',
+ 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
+ 'duration': 771.301,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ 'info_dict': {
+ 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # live stream
+ 'url': 'http://original.livestream.com/znsbahamas',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, user, video_id):
+ api_url = 'http://x%sx.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id=%s' % (user, video_id)
+ info = self._download_xml(api_url, video_id)
+
+ item = info.find('channel').find('item')
+ title = xpath_text(item, 'title')
+ media_ns = {'media': 'http://search.yahoo.com/mrss'}
+ thumbnail_url = xpath_attr(
+ item, xpath_with_ns('media:thumbnail', media_ns), 'url')
+ duration = float_or_none(xpath_attr(
+ item, xpath_with_ns('media:content', media_ns), 'duration'))
+ ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'}
+ view_count = int_or_none(xpath_text(
+ item, xpath_with_ns('ls:viewsCount', ls_ns)))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
+
+ def _extract_video_formats(self, video_data, video_id):
+ formats = []
+
+ progressive_url = video_data.get('progressiveUrl')
+ if progressive_url:
+ formats.append({
+ 'url': progressive_url,
+ 'format_id': 'http',
+ })
+
+ m3u8_url = video_data.get('httpUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ rtsp_url = video_data.get('rtspUrl')
+ if rtsp_url:
+ formats.append({
+ 'url': rtsp_url,
+ 'format_id': 'rtsp',
+ })
+
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_folder(self, url, folder_id):
+ webpage = self._download_webpage(url, folder_id)
+ paths = orderedSet(re.findall(
+ r'''(?x)(?:
+ <li\s+class="folder">\s*<a\s+href="|
+ <a\s+href="(?=https?://livestre\.am/)
+ )([^"]+)"''', webpage))
+
+ entries = [{
+ '_type': 'url',
+ 'url': compat_urlparse.urljoin(url, p),
+ } for p in paths]
+
+ return self.playlist_result(entries, folder_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user = mobj.group('user')
+ url_type = mobj.group('type')
+ content_id = mobj.group('id')
+ if url_type == 'folder':
+ return self._extract_folder(url, content_id)
+ else:
+ # this url is used on mobile devices
+ stream_url = 'http://x%sx.api.channel.livestream.com/3.0/getstream.json' % user
+ info = {}
+ if content_id:
+ stream_url += '?id=%s' % content_id
+ info = self._extract_video_info(user, content_id)
+ else:
+ content_id = user
+ webpage = self._download_webpage(url, content_id)
+ info = {
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
+ }
+ video_data = self._download_json(stream_url, content_id)
+ is_live = video_data.get('isLive')
+ info.update({
+ 'id': content_id,
+ 'title': self._live_title(info['title']) if is_live else info['title'],
+ 'formats': self._extract_video_formats(video_data, content_id),
+ 'is_live': is_live,
+ })
+ return info
+
+
+# The server doesn't support HEAD request, the generic extractor can't detect
+# the redirection
+class LivestreamShortenerIE(InfoExtractor):
+ IE_NAME = 'livestream:shortener'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ id = mobj.group('id')
+ webpage = self._download_webpage(url, id)
+
+ return self.url_result(self._og_search_url(webpage))
diff --git a/hypervideo_dl/extractor/lnkgo.py b/hypervideo_dl/extractor/lnkgo.py
new file mode 100644
index 0000000..3e71852
--- /dev/null
+++ b/hypervideo_dl/extractor/lnkgo.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ compat_str,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class LnkGoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
+ 'info_dict': {
+ 'id': '10809',
+ 'ext': 'mp4',
+ 'title': "Put'ka: Trys Klausimai",
+ 'upload_date': '20161216',
+ 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
+ 'age_limit': 18,
+ 'duration': 117,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1481904000,
+ },
+ 'params': {
+ 'skip_download': True, # HLS download
+ },
+ }, {
+ 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
+ 'info_dict': {
+ 'id': '10467',
+ 'ext': 'mp4',
+ 'title': 'Nėrdas: Kompiuterio Valymas',
+ 'upload_date': '20150113',
+ 'description': 'md5:7352d113a242a808676ff17e69db6a69',
+ 'age_limit': 18,
+ 'duration': 346,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421164800,
+ },
+ 'params': {
+ 'skip_download': True, # HLS download
+ },
+ }, {
+ 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
+ 'only_matching': True,
+ }]
+ _AGE_LIMITS = {
+ 'N-7': 7,
+ 'N-14': 14,
+ 'S': 18,
+ }
+ _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ video_info = self._download_json(
+ 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
+ display_id)['videoConfig']['videoInfo']
+
+ video_id = compat_str(video_info['id'])
+ title = video_info['title']
+ prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
+ formats = self._extract_m3u8_formats(
+ self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
+ video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ poster_image = video_info.get('posterImage')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None,
+ 'duration': int_or_none(video_info.get('duration')),
+ 'description': clean_html(video_info.get('htmlDescription')),
+ 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
+ 'timestamp': parse_iso8601(video_info.get('airDate')),
+ 'view_count': int_or_none(video_info.get('viewsCount')),
+ }
diff --git a/hypervideo_dl/extractor/localnews8.py b/hypervideo_dl/extractor/localnews8.py
new file mode 100644
index 0000000..aad3961
--- /dev/null
+++ b/hypervideo_dl/extractor/localnews8.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class LocalNews8IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
+ 'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
+ 'info_dict': {
+ 'id': '35183304',
+ 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
+ 'ext': 'mp4',
+ 'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
+ 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
+ 'duration': 153,
+ 'timestamp': 1441844822,
+ 'upload_date': '20150910',
+ 'uploader_id': 'api',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ partner_id = self._search_regex(
+ r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
+ webpage, 'partner id', group='id')
+ kaltura_id = self._search_regex(
+ r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
+ webpage, 'videl id', group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
+ 'ie_key': 'Kaltura',
+ 'id': video_id,
+ 'display_id': display_id,
+ }
diff --git a/hypervideo_dl/extractor/lovehomeporn.py b/hypervideo_dl/extractor/lovehomeporn.py
new file mode 100644
index 0000000..8f65a3c
--- /dev/null
+++ b/hypervideo_dl/extractor/lovehomeporn.py
@@ -0,0 +1,37 @@
+from __future__ import unicode_literals
+
+import re
+
+from .nuevo import NuevoBaseIE
+
+
+class LoveHomePornIE(NuevoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?lovehomeporn\.com/video/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TEST = {
+ 'url': 'http://lovehomeporn.com/video/48483/stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick#menu',
+ 'info_dict': {
+ 'id': '48483',
+ 'display_id': 'stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick',
+ 'ext': 'mp4',
+ 'title': 'Stunning busty brunette girlfriend sucking and riding a big dick',
+ 'age_limit': 18,
+ 'duration': 238.47,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ info = self._extract_nuevo(
+ 'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id,
+ video_id)
+ info.update({
+ 'display_id': display_id,
+ 'age_limit': 18
+ })
+ return info
diff --git a/hypervideo_dl/extractor/lrt.py b/hypervideo_dl/extractor/lrt.py
new file mode 100644
index 0000000..89d5498
--- /dev/null
+++ b/hypervideo_dl/extractor/lrt.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ merge_dicts,
+)
+
+
+class LRTIE(InfoExtractor):
+ IE_NAME = 'lrt.lt'
+ _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
+ _TESTS = [{
+ # m3u8 download
+ 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
+ 'md5': '85cb2bb530f31d91a9c65b479516ade4',
+ 'info_dict': {
+ 'id': '2000127261',
+ 'ext': 'mp4',
+ 'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
+ 'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
+ 'duration': 3035,
+ 'timestamp': 1604079000,
+ 'upload_date': '20201030',
+ },
+ }, {
+ # direct mp3 download
+ 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
+ 'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
+ 'info_dict': {
+ 'id': '1013074524',
+ 'ext': 'mp3',
+ 'title': 'Kita tema 2016-09-05 15:05',
+ 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
+ 'duration': 3008,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }]
+
+ def _extract_js_var(self, webpage, var_name, default):
+ return self._search_regex(
+ r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
+ webpage, var_name.replace('_', ' '), default, group=2)
+
+ def _real_extract(self, url):
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, video_id)
+
+ media_url = self._extract_js_var(webpage, 'main_url', path)
+ media = self._download_json(self._extract_js_var(
+ webpage, 'media_info_url',
+ 'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
+ video_id, query={'url': media_url})
+ jw_data = self._parse_jwplayer_data(
+ media['playlist_item'], video_id, base_url=url)
+
+ json_ld_data = self._search_json_ld(webpage, video_id)
+
+ tags = []
+ for tag in (media.get('tags') or []):
+ tag_name = tag.get('name')
+ if not tag_name:
+ continue
+ tags.append(tag_name)
+
+ clean_info = {
+ 'description': clean_html(media.get('content')),
+ 'tags': tags,
+ }
+
+ return merge_dicts(clean_info, jw_data, json_ld_data)
diff --git a/hypervideo_dl/extractor/lynda.py b/hypervideo_dl/extractor/lynda.py
new file mode 100644
index 0000000..b3d8653
--- /dev/null
+++ b/hypervideo_dl/extractor/lynda.py
@@ -0,0 +1,341 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class LyndaBaseIE(InfoExtractor):
+ _SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
+ _PASSWORD_URL = 'https://www.lynda.com/signin/password'
+ _USER_URL = 'https://www.lynda.com/signin/user'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
+ _NETRC_MACHINE = 'lynda'
+
+ def _real_initialize(self):
+ self._login()
+
+ @staticmethod
+ def _check_error(json_string, key_or_keys):
+ keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
+ for key in keys:
+ error = json_string.get(key)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+ def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
+ action_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
+ 'post url', default=fallback_action_url, group='url')
+
+ if not action_url.startswith('http'):
+ action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)
+
+ form_data = self._hidden_inputs(form_html)
+ form_data.update(extra_form_data)
+
+ response = self._download_json(
+ action_url, None, note,
+ data=urlencode_postdata(form_data),
+ headers={
+ 'Referer': referrer_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, expected_status=(418, 500, ))
+
+ self._check_error(response, ('email', 'password', 'ErrorMessage'))
+
+ return response, action_url
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ # Step 1: download signin page
+ signin_page = self._download_webpage(
+ self._SIGNIN_URL, None, 'Downloading signin page')
+
+ # Already logged in
+ if any(re.search(p, signin_page) for p in (
+ r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
+ return
+
+ # Step 2: submit email
+ signin_form = self._search_regex(
+ r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
+ signin_page, 'signin form')
+ signin_page, signin_url = self._login_step(
+ signin_form, self._PASSWORD_URL, {'email': username},
+ 'Submitting email', self._SIGNIN_URL)
+
+ # Step 3: submit password
+ password_form = signin_page['body']
+ self._login_step(
+ password_form, self._USER_URL, {'email': username, 'password': password},
+ 'Submitting password', signin_url)
+
+
+class LyndaIE(LyndaBaseIE):
+ IE_NAME = 'lynda'
+ IE_DESC = 'lynda.com videos'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:lynda\.com|educourse\.ga)/
+ (?:
+ (?:[^/]+/){2,3}(?P<course_id>\d+)|
+ player/embed
+ )/
+ (?P<id>\d+)
+ '''
+
+ _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
+
+ _TESTS = [{
+ 'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': '114408',
+ 'ext': 'mp4',
+ 'title': 'Using the exercise files',
+ 'duration': 68
+ }
+ }, {
+ 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
+ 'only_matching': True,
+ }, {
+ # Status="NotFound", Message="Transcript not found"
+ 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
+ 'only_matching': True,
+ }]
+
+ def _raise_unavailable(self, video_id):
+ self.raise_login_required(
+ 'Video %s is only available for members' % video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ course_id = mobj.group('course_id')
+
+ query = {
+ 'videoId': video_id,
+ 'type': 'video',
+ }
+
+ video = self._download_json(
+ 'https://www.lynda.com/ajax/player', video_id,
+ 'Downloading video JSON', fatal=False, query=query)
+
+ # Fallback scenario
+ if not video:
+ query['courseId'] = course_id
+
+ play = self._download_json(
+ 'https://www.lynda.com/ajax/course/%s/%s/play'
+ % (course_id, video_id), video_id, 'Downloading play JSON')
+
+ if not play:
+ self._raise_unavailable(video_id)
+
+ formats = []
+ for formats_dict in play:
+ urls = formats_dict.get('urls')
+ if not isinstance(urls, dict):
+ continue
+ cdn = formats_dict.get('name')
+ for format_id, format_url in urls.items():
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
+ 'height': int_or_none(format_id),
+ })
+ self._sort_formats(formats)
+
+ conviva = self._download_json(
+ 'https://www.lynda.com/ajax/player/conviva', video_id,
+ 'Downloading conviva JSON', query=query)
+
+ return {
+ 'id': video_id,
+ 'title': conviva['VideoTitle'],
+ 'description': conviva.get('VideoDescription'),
+ 'release_year': int_or_none(conviva.get('ReleaseYear')),
+ 'duration': int_or_none(conviva.get('Duration')),
+ 'creator': conviva.get('Author'),
+ 'formats': formats,
+ }
+
+ if 'Status' in video:
+ raise ExtractorError(
+ 'lynda returned error: %s' % video['Message'], expected=True)
+
+ if video.get('HasAccess') is False:
+ self._raise_unavailable(video_id)
+
+ video_id = compat_str(video.get('ID') or video_id)
+ duration = int_or_none(video.get('DurationInSeconds'))
+ title = video['Title']
+
+ formats = []
+
+ fmts = video.get('Formats')
+ if fmts:
+ formats.extend([{
+ 'url': f['Url'],
+ 'ext': f.get('Extension'),
+ 'width': int_or_none(f.get('Width')),
+ 'height': int_or_none(f.get('Height')),
+ 'filesize': int_or_none(f.get('FileSize')),
+ 'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,
+ } for f in fmts if f.get('Url')])
+
+ prioritized_streams = video.get('PrioritizedStreams')
+ if prioritized_streams:
+ for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
+ formats.extend([{
+ 'url': video_url,
+ 'height': int_or_none(format_id),
+ 'format_id': '%s-%s' % (prioritized_stream_id, format_id),
+ } for format_id, video_url in prioritized_stream.items()])
+
+ self._check_formats(formats, video_id)
+ self._sort_formats(formats)
+
+ subtitles = self.extract_subtitles(video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'subtitles': subtitles,
+ 'formats': formats
+ }
+
+ def _fix_subtitles(self, subs):
+ srt = ''
+ seq_counter = 0
+ for pos in range(0, len(subs) - 1):
+ seq_current = subs[pos]
+ m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
+ if m_current is None:
+ continue
+ seq_next = subs[pos + 1]
+ m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
+ if m_next is None:
+ continue
+ appear_time = m_current.group('timecode')
+ disappear_time = m_next.group('timecode')
+ text = seq_current['Caption'].strip()
+ if text:
+ seq_counter += 1
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
+ if srt:
+ return srt
+
+ def _get_subtitles(self, video_id):
+ url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
+ subs = self._download_webpage(
+ url, video_id, 'Downloading subtitles JSON', fatal=False)
+ if not subs or 'Status="NotFound"' in subs:
+ return {}
+ subs = self._parse_json(subs, video_id, fatal=False)
+ if not subs:
+ return {}
+ fixed_subs = self._fix_subtitles(subs)
+ if fixed_subs:
+ return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
+ return {}
+
+
+class LyndaCourseIE(LyndaBaseIE):
+ IE_NAME = 'lynda:course'
+ IE_DESC = 'lynda.com online courses'
+
+ # Course link equals to welcome/introduction video link of same course
+ # We will recognize it as course link
+ _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_path = mobj.group('coursepath')
+ course_id = mobj.group('courseid')
+
+ item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path
+
+ course = self._download_json(
+ 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+ course_id, 'Downloading course JSON', fatal=False)
+
+ if not course:
+ webpage = self._download_webpage(url, course_id)
+ entries = [
+ self.url_result(
+ item_template % video_id, ie=LyndaIE.ie_key(),
+ video_id=video_id)
+ for video_id in re.findall(
+ r'data-video-id=["\'](\d+)', webpage)]
+ return self.playlist_result(
+ entries, course_id,
+ self._og_search_title(webpage, fatal=False),
+ self._og_search_description(webpage))
+
+ if course.get('Status') == 'NotFound':
+ raise ExtractorError(
+ 'Course %s does not exist' % course_id, expected=True)
+
+ unaccessible_videos = 0
+ entries = []
+
+ # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
+ # by single video API anymore
+
+ for chapter in course['Chapters']:
+ for video in chapter.get('Videos', []):
+ if video.get('HasAccess') is False:
+ unaccessible_videos += 1
+ continue
+ video_id = video.get('ID')
+ if video_id:
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': item_template % video_id,
+ 'ie_key': LyndaIE.ie_key(),
+ 'chapter': chapter.get('Title'),
+ 'chapter_number': int_or_none(chapter.get('ChapterIndex')),
+ 'chapter_id': compat_str(chapter.get('ID')),
+ })
+
+ if unaccessible_videos > 0:
+ self._downloader.report_warning(
+ '%s videos are only available for members (or paid members) and will not be downloaded. '
+ % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
+
+ course_title = course.get('Title')
+ course_description = course.get('Description')
+
+ return self.playlist_result(entries, course_id, course_title, course_description)
diff --git a/hypervideo_dl/extractor/m6.py b/hypervideo_dl/extractor/m6.py
new file mode 100644
index 0000000..9806875
--- /dev/null
+++ b/hypervideo_dl/extractor/m6.py
@@ -0,0 +1,25 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class M6IE(InfoExtractor):
+ IE_NAME = 'm6'
+ _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
+
+ _TEST = {
+ 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
+ 'md5': '242994a87de2c316891428e0176bcb77',
+ 'info_dict': {
+ 'id': '11323908',
+ 'ext': 'mp4',
+ 'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
+ 'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
+ 'duration': 100,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)
diff --git a/hypervideo_dl/extractor/mailru.py b/hypervideo_dl/extractor/mailru.py
new file mode 100644
index 0000000..65cc474
--- /dev/null
+++ b/hypervideo_dl/extractor/mailru.py
@@ -0,0 +1,329 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ remove_end,
+ try_get,
+)
+
+
+class MailRuIE(InfoExtractor):
+ IE_NAME = 'mailru'
+ IE_DESC = 'Видео@Mail.Ru'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m)\.)?my\.mail\.ru/+
+ (?:
+ video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
+ (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
+ (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
+ )
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+ 'md5': 'dea205f03120046894db4ebb6159879a',
+ 'info_dict': {
+ 'id': '46301138_76',
+ 'ext': 'mp4',
+ 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+ 'timestamp': 1393235077,
+ 'upload_date': '20140224',
+ 'uploader': 'sonypicturesrus',
+ 'uploader_id': 'sonypicturesrus@mail.ru',
+ 'duration': 184,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+ 'md5': '00a91a58c3402204dcced523777b475f',
+ 'info_dict': {
+ 'id': '46843144_1263',
+ 'ext': 'mp4',
+ 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+ 'timestamp': 1397039888,
+ 'upload_date': '20140409',
+ 'uploader': 'hitech',
+ 'uploader_id': 'hitech@corp.mail.ru',
+ 'duration': 245,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ # only available via metaUrl API
+ 'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
+ 'md5': '3b26d2491c6949d031a32b96bd97c096',
+ 'info_dict': {
+ 'id': '56664382_502',
+ 'ext': 'mp4',
+ 'title': ':8336',
+ 'timestamp': 1449094163,
+ 'upload_date': '20151202',
+ 'uploader': '720pizle@mail.ru',
+ 'uploader_id': '720pizle@mail.ru',
+ 'duration': 6001,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru/video/embed/7949340477499637815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ meta_id = mobj.group('metaid')
+
+ video_id = None
+ if meta_id:
+ meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
+ else:
+ video_id = mobj.group('idv1')
+ if not video_id:
+ video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
+ webpage = self._download_webpage(url, video_id)
+ page_config = self._parse_json(self._search_regex(
+ r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
+ webpage, 'page config', default='{}'), video_id, fatal=False)
+ if page_config:
+ meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
+ else:
+ meta_url = None
+
+ video_data = None
+ if meta_url:
+ video_data = self._download_json(
+ meta_url, video_id or meta_id, 'Downloading video meta JSON',
+ fatal=not video_id)
+
+ # Fallback old approach
+ if not video_data:
+ video_data = self._download_json(
+ 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
+ video_id, 'Downloading video JSON')
+
+ headers = {}
+
+ video_key = self._get_cookies('https://my.mail.ru').get('video_key')
+ if video_key:
+ headers['Cookie'] = 'video_key=%s' % video_key.value
+
+ formats = []
+ for f in video_data['videos']:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ format_id = f.get('key')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'height': height,
+ 'http_headers': headers,
+ })
+ self._sort_formats(formats)
+
+ meta_data = video_data['meta']
+ title = remove_end(meta_data['title'], '.mp4')
+
+ author = video_data.get('author')
+ uploader = author.get('name')
+ uploader_id = author.get('id') or author.get('email')
+ view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
+
+ acc_id = meta_data.get('accId')
+ item_id = meta_data.get('itemId')
+ content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
+
+ thumbnail = meta_data.get('poster')
+ duration = int_or_none(meta_data.get('duration'))
+ timestamp = int_or_none(meta_data.get('timestamp'))
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
+
+
+class MailRuMusicSearchBaseIE(InfoExtractor):
+ def _search(self, query, url, audio_id, limit=100, offset=0):
+ search = self._download_json(
+ 'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
+ 'Downloading songs JSON page %d' % (offset // limit + 1),
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'xemail': '',
+ 'ajax_call': '1',
+ 'func_name': 'music.search',
+ 'mna': '',
+ 'mnb': '',
+ 'arg_query': query,
+ 'arg_extended': '1',
+ 'arg_search_params': json.dumps({
+ 'music': {
+ 'limit': limit,
+ 'offset': offset,
+ },
+ }),
+ 'arg_limit': limit,
+ 'arg_offset': offset,
+ })
+ return next(e for e in search if isinstance(e, dict))
+
+ @staticmethod
+ def _extract_track(t, fatal=True):
+ audio_url = t['URL'] if fatal else t.get('URL')
+ if not audio_url:
+ return
+
+ audio_id = t['File'] if fatal else t.get('File')
+ if not audio_id:
+ return
+
+ thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
+ uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
+ uploader_id = t.get('UploaderID')
+ duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
+ t.get('Duration') or t.get('DurationStr'))
+ view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
+
+ track = t.get('Name') or t.get('Name_Text_HTML')
+ artist = t.get('Author') or t.get('Author_Text_HTML')
+
+ if track:
+ title = '%s - %s' % (artist, track) if artist else track
+ else:
+ title = audio_id
+
+ return {
+ 'extractor_key': MailRuMusicIE.ie_key(),
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'vcodec': 'none',
+ 'abr': int_or_none(t.get('BitRate')),
+ 'track': track,
+ 'artist': artist,
+ 'album': t.get('Album'),
+ 'url': audio_url,
+ }
+
+
+class MailRuMusicIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
+ 'md5': '0f8c22ef8c5d665b13ac709e63025610',
+ 'info_dict': {
+ 'id': '4e31f7125d0dfaef505d947642366893',
+ 'ext': 'mp3',
+ 'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
+ 'uploader': 'Игорь Мудрый',
+ 'uploader_id': '1459196328',
+ 'duration': 280,
+ 'view_count': int,
+ 'vcodec': 'none',
+ 'abr': 320,
+ 'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
+ 'artist': 'М8Л8ТХ',
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._og_search_title(webpage)
+ music_data = self._search(title, url, audio_id)['MusicData']
+ t = next(t for t in music_data if t.get('File') == audio_id)
+
+ info = self._extract_track(t)
+ info['title'] = title
+ return info
+
+
+class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music:search'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/search/black%20shadow',
+ 'info_dict': {
+ 'id': 'black shadow',
+ },
+ 'playlist_mincount': 532,
+ }]
+
+ def _real_extract(self, url):
+ query = compat_urllib_parse_unquote(self._match_id(url))
+
+ entries = []
+
+ LIMIT = 100
+ offset = 0
+
+ for _ in itertools.count(1):
+ search = self._search(query, url, query, LIMIT, offset)
+
+ music_data = search.get('MusicData')
+ if not music_data or not isinstance(music_data, list):
+ break
+
+ for t in music_data:
+ track = self._extract_track(t, fatal=False)
+ if track:
+ entries.append(track)
+
+ total = try_get(
+ search, lambda x: x['Results']['music']['Total'], int)
+
+ if total is not None:
+ if offset > total:
+ break
+
+ offset += LIMIT
+
+ return self.playlist_result(entries, query)
diff --git a/hypervideo_dl/extractor/malltv.py b/hypervideo_dl/extractor/malltv.py
new file mode 100644
index 0000000..fadfd93
--- /dev/null
+++ b/hypervideo_dl/extractor/malltv.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ parse_duration,
+ try_get,
+)
+
+
+class MallTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'md5': '1c4a37f080e1f3023103a7b43458e518',
+ 'info_dict': {
+ 'id': 't0zzt0',
+ 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'ext': 'mp4',
+ 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
+ 'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
+ 'duration': 216,
+ 'timestamp': 1538870400,
+ 'upload_date': '20181007',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, display_id, headers=self.geo_verification_headers())
+
+ video = self._parse_json(self._search_regex(
+ r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
+ webpage, 'video object'), display_id)
+ video_source = video['VideoSource']
+ video_id = self._search_regex(
+ r'/([\da-z]+)/index\b', video_source, 'video id')
+
+ formats = self._extract_m3u8_formats(
+ video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for s in (video.get('Subtitles') or {}):
+ s_url = s.get('Url')
+ if not s_url:
+ continue
+ subtitles.setdefault(s.get('Language') or 'cz', []).append({
+ 'url': s_url,
+ })
+
+ entity_counts = video.get('EntityCounts') or {}
+
+ def get_count(k):
+ v = entity_counts.get(k + 's') or {}
+ return int_or_none(dict_get(v, ('Count', 'StrCount')))
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': video.get('Title'),
+ 'description': clean_html(video.get('Description')),
+ 'thumbnail': video.get('ThumbnailUrl'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
+ 'view_count': get_count('View'),
+ 'like_count': get_count('Like'),
+ 'dislike_count': get_count('Dislike'),
+ 'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
+ 'comment_count': get_count('Comment'),
+ }, info)
diff --git a/hypervideo_dl/extractor/mangomolo.py b/hypervideo_dl/extractor/mangomolo.py
new file mode 100644
index 0000000..acee370
--- /dev/null
+++ b/hypervideo_dl/extractor/mangomolo.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+from ..utils import int_or_none
+
+
+class MangomoloBaseIE(InfoExtractor):
+ _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+
+ def _get_real_id(self, page_id):
+ return page_id
+
+ def _real_extract(self, url):
+ page_id = self._get_real_id(self._match_id(url))
+ webpage = self._download_webpage(
+ 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
+
+ format_url = self._html_search_regex(
+ [
+ r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
+ r'<a[^>]+href="(rtsp://[^"]+)"'
+ ], webpage, 'format url')
+ formats = self._extract_wowza_formats(
+ format_url, page_id, m3u8_entry_protocol, ['smil'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': page_id,
+ 'title': self._live_title(page_id) if self._IS_LIVE else page_id,
+ 'uploader_id': hidden_inputs.get('userid'),
+ 'duration': int_or_none(hidden_inputs.get('duration')),
+ 'is_live': self._IS_LIVE,
+ 'formats': formats,
+ }
+
+
+class MangomoloVideoIE(MangomoloBaseIE):
+ _TYPE = 'video'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
+ _IS_LIVE = False
+
+
+class MangomoloLiveIE(MangomoloBaseIE):
+ _TYPE = 'live'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+ _IS_LIVE = True
+
+ def _get_real_id(self, page_id):
+ return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
diff --git a/hypervideo_dl/extractor/manyvids.py b/hypervideo_dl/extractor/manyvids.py
new file mode 100644
index 0000000..e8d7163
--- /dev/null
+++ b/hypervideo_dl/extractor/manyvids.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ str_to_int,
+ urlencode_postdata,
+)
+
+
+class ManyVidsIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ # preview video
+ 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
+ 'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
+ 'info_dict': {
+ 'id': '133957',
+ 'ext': 'mp4',
+ 'title': 'everthing about me (Preview)',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }, {
+ # full video
+ 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
+ 'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+ 'info_dict': {
+ 'id': '935718',
+ 'ext': 'mp4',
+ 'title': 'MY FACE REVEAL',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video URL', group='url')
+
+ title = self._html_search_regex(
+ (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+ r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+ webpage, 'title', default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', fatal=True)
+
+ if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
+ title += ' (Preview)'
+
+ mv_token = self._search_regex(
+ r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'mv token', default=None, group='value')
+
+ if mv_token:
+ # Sets some cookies
+ self._download_webpage(
+ 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
+ video_id, fatal=False, data=urlencode_postdata({
+ 'mvtoken': mv_token,
+ 'vid': video_id,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest'
+ })
+
+ if determine_ext(video_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ formats = [{'url': video_url}]
+
+ like_count = int_or_none(self._search_regex(
+ r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
+ view_count = str_to_int(self._html_search_regex(
+ r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
+ 'view count', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/maoritv.py b/hypervideo_dl/extractor/maoritv.py
new file mode 100644
index 0000000..0d23fec
--- /dev/null
+++ b/hypervideo_dl/extractor/maoritv.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MaoriTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
+ 'md5': '5ade8ef53851b6a132c051b1cd858899',
+ 'info_dict': {
+ 'id': '4774724855001',
+ 'ext': 'mp4',
+ 'title': 'Kōrero Mai, Series 1 Episode 54',
+ 'upload_date': '20160226',
+ 'timestamp': 1456455018,
+ 'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
+ 'uploader_id': '1614493167001',
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ brightcove_id = self._search_regex(
+ r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'BrightcoveNew', brightcove_id)
diff --git a/hypervideo_dl/extractor/markiza.py b/hypervideo_dl/extractor/markiza.py
new file mode 100644
index 0000000..def960a
--- /dev/null
+++ b/hypervideo_dl/extractor/markiza.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ orderedSet,
+ parse_duration,
+ try_get,
+)
+
+
+class MarkizaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
+ _TESTS = [{
+ 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
+ 'md5': 'ada4e9fad038abeed971843aa028c7b0',
+ 'info_dict': {
+ 'id': '139078',
+ 'ext': 'mp4',
+ 'title': 'Oteckovia 109',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2760,
+ },
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
+ 'info_dict': {
+ 'id': '85430',
+ 'title': 'Televízne noviny',
+ },
+ 'playlist_count': 23,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/84723',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/embed/85295',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
+ video_id, query={'id': video_id})
+
+ info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
+
+ if info.get('_type') == 'playlist':
+ info.update({
+ 'id': video_id,
+ 'title': try_get(
+ data, lambda x: x['details']['name'], compat_str),
+ })
+ else:
+ info['duration'] = parse_duration(
+ try_get(data, lambda x: x['details']['duration'], compat_str))
+ return info
+
+
+class MarkizaPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
+ _TESTS = [{
+ 'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
+ 'md5': 'ada4e9fad038abeed971843aa028c7b0',
+ 'info_dict': {
+ 'id': '139355',
+ 'ext': 'mp4',
+ 'title': 'Oteckovia 110',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2604,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ # Downloading for some hosts (e.g. dajto, doma) fails with 500
+ # although everything seems to be OK, so considering 500
+ # status code to be expected.
+ url, playlist_id, expected_status=500)
+
+ entries = [
+ self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
+ for video_id in orderedSet(re.findall(
+ r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
+ webpage))]
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/massengeschmacktv.py b/hypervideo_dl/extractor/massengeschmacktv.py
new file mode 100644
index 0000000..cfcc6b2
--- /dev/null
+++ b/hypervideo_dl/extractor/massengeschmacktv.py
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_filesize,
+)
+
+
+class MassengeschmackTVIE(InfoExtractor):
+ IE_NAME = 'massengeschmack.tv'
+ _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
+
+ _TEST = {
+ 'url': 'https://massengeschmack.tv/play/fktv202',
+ 'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
+ 'info_dict': {
+ 'id': 'fktv202',
+ 'ext': 'mp4',
+ 'title': 'Fernsehkritik-TV - Folge 202',
+ },
+ }
+
+ def _real_extract(self, url):
+ episode = self._match_id(url)
+
+ webpage = self._download_webpage(url, episode)
+ title = clean_html(self._html_search_regex(
+ '<h3>([^<]+)</h3>', webpage, 'title'))
+ thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
+ sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
+
+ formats = []
+ for source in sources:
+ furl = source.get('src')
+ if not furl:
+ continue
+ furl = self._proto_relative_url(furl)
+ ext = determine_ext(furl) or mimetype2ext(source.get('type'))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ furl, episode, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': furl,
+ 'format_id': determine_ext(furl),
+ })
+
+ for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
+ <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
+ <strong>(?P<format_id>.+?)</strong>.*?
+ <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
+ ''', webpage):
+ formats.append({
+ 'url': durl,
+ 'format_id': format_id,
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ 'filesize': parse_filesize(filesize),
+ 'vcodec': 'none' if format_id.startswith('Audio') else None,
+ })
+
+ self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
+
+ return {
+ 'id': episode,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/matchtv.py b/hypervideo_dl/extractor/matchtv.py
new file mode 100644
index 0000000..bc9933a
--- /dev/null
+++ b/hypervideo_dl/extractor/matchtv.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import xpath_text
+
+
+class MatchTVIE(InfoExtractor):
+ _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
+ _TESTS = [{
+ 'url': 'http://matchtv.ru/#live-player',
+ 'info_dict': {
+ 'id': 'matchtv-live',
+ 'ext': 'flv',
+ 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://matchtv.ru/on-air/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = 'matchtv-live'
+ video_url = self._download_json(
+ 'http://player.matchtv.ntvplus.tv/player/smil', video_id,
+ query={
+ 'ts': '',
+ 'quality': 'SD',
+ 'contentId': '561d2c0df7159b37178b4567',
+ 'sign': '',
+ 'includeHighlights': '0',
+ 'userId': '',
+ 'sessionId': random.randint(1, 1000000000),
+ 'contentType': 'channel',
+ 'timeShift': '0',
+ 'platform': 'portal',
+ },
+ headers={
+ 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
+ })['data']['videoUrl']
+ f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
+ formats = self._extract_f4m_formats(f4m_url, video_id)
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': self._live_title('Матч ТВ - Прямой эфир'),
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/mdr.py b/hypervideo_dl/extractor/mdr.py
new file mode 100644
index 0000000..dc6aa98
--- /dev/null
+++ b/hypervideo_dl/extractor/mdr.py
@@ -0,0 +1,195 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ url_or_none,
+ xpath_text,
+)
+
+
+class MDRIE(InfoExtractor):
+ IE_DESC = 'MDR.DE and KiKA'
+ _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
+
+ _GEO_COUNTRIES = ['DE']
+
+ _TESTS = [{
+ # MDR regularly deletes its videos
+ 'url': 'http://www.mdr.de/fakt/video189002.html',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
+ 'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
+ 'info_dict': {
+ 'id': '1312272',
+ 'ext': 'mp3',
+ 'title': 'Feuilleton vom 30. Oktober 2015',
+ 'duration': 250,
+ 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
+ },
+ 'skip': '404 not found',
+ }, {
+ 'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
+ 'md5': '4930515e36b06c111213e80d1e4aad0e',
+ 'info_dict': {
+ 'id': '19636',
+ 'ext': 'mp4',
+ 'title': 'Baumhaus vom 30. Oktober 2015',
+ 'duration': 134,
+ 'uploader': 'KIKA',
+ },
+ 'skip': '404 not found',
+ }, {
+ 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
+ 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
+ 'info_dict': {
+ 'id': '8182',
+ 'ext': 'mp4',
+ 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
+ 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
+ 'timestamp': 1482541200,
+ 'upload_date': '20161224',
+ 'duration': 4628,
+ 'uploader': 'KIKA',
+ },
+ }, {
+ # audio with alternative playerURL pattern
+ 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
+ 'info_dict': {
+ 'id': '100',
+ 'ext': 'mp4',
+ 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson',
+ 'duration': 3239,
+ 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
+ },
+ }, {
+ # empty bitrateVideo and bitrateAudio
+ 'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
+ 'info_dict': {
+ 'id': '128372',
+ 'ext': 'mp4',
+ 'title': 'Der kleine Wichtel kehrt zurück',
+ 'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
+ 'duration': 4876,
+ 'timestamp': 1607823300,
+ 'upload_date': '20201213',
+ 'uploader': 'ZDF',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data_url = self._search_regex(
+ r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1',
+ webpage, 'data url', group='url').replace(r'\/', '/')
+
+ doc = self._download_xml(
+ compat_urlparse.urljoin(url, data_url), video_id)
+
+ title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
+
+ type_ = xpath_text(doc, './type', default=None)
+
+ formats = []
+ processed_urls = []
+ for asset in doc.findall('./assets/asset'):
+ for source in (
+ 'download',
+ 'progressiveDownload',
+ 'dynamicHttpStreamingRedirector',
+ 'adaptiveHttpStreamingRedirector'):
+ url_el = asset.find('./%sUrl' % source)
+ if url_el is None:
+ continue
+
+ video_url = url_or_none(url_el.text)
+ if not video_url or video_url in processed_urls:
+ continue
+
+ processed_urls.append(video_url)
+
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=0, m3u8_id='HLS', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
+ preference=0, f4m_id='HDS', fatal=False))
+ else:
+ media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
+ vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
+ abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
+ filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
+
+ format_id = [media_type]
+ if vbr or abr:
+ format_id.append(compat_str(vbr or abr))
+
+ f = {
+ 'url': video_url,
+ 'format_id': '-'.join(format_id),
+ 'filesize': filesize,
+ 'abr': abr,
+ 'vbr': vbr,
+ }
+
+ if vbr:
+ f.update({
+ 'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
+ 'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
+ })
+
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
+ timestamp = parse_iso8601(
+ xpath_text(
+ doc, [
+ './broadcast/broadcastDate',
+ './broadcast/broadcastStartDate',
+ './broadcast/broadcastEndDate'],
+ 'timestamp', default=None))
+ duration = parse_duration(xpath_text(doc, './duration', 'duration'))
+ uploader = xpath_text(doc, './rights', 'uploader')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/medaltv.py b/hypervideo_dl/extractor/medaltv.py
new file mode 100644
index 0000000..67bb4de
--- /dev/null
+++ b/hypervideo_dl/extractor/medaltv.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class MedalTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
+ 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
+ 'info_dict': {
+ 'id': '2mA60jWAGQCBH',
+ 'ext': 'mp4',
+ 'title': 'Quad Cold',
+ 'description': 'Medal,https://medal.tv/desktop/',
+ 'uploader': 'MowgliSB',
+ 'timestamp': 1603165266,
+ 'upload_date': '20201020',
+ 'uploader_id': '10619174',
+ }
+ }, {
+ 'url': 'https://medal.tv/clips/2um24TWdty0NA',
+ 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
+ 'info_dict': {
+ 'id': '2um24TWdty0NA',
+ 'ext': 'mp4',
+ 'title': 'u tk me i tk u bigger',
+ 'description': 'Medal,https://medal.tv/desktop/',
+ 'uploader': 'Mimicc',
+ 'timestamp': 1605580939,
+ 'upload_date': '20201117',
+ 'uploader_id': '5156321',
+ }
+ }, {
+ 'url': 'https://medal.tv/clips/37rMeFpryCC-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ hydration_data = self._parse_json(self._search_regex(
+ r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
+ webpage, 'hydration data', default='{}'), video_id)
+
+ clip = try_get(
+ hydration_data, lambda x: x['clips'][video_id], dict) or {}
+ if not clip:
+ raise ExtractorError(
+ 'Could not find video information.', video_id=video_id)
+
+ title = clip['contentTitle']
+
+ source_width = int_or_none(clip.get('sourceWidth'))
+ source_height = int_or_none(clip.get('sourceHeight'))
+
+ aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
+
+ def add_item(container, item_url, height, id_key='format_id', item_id=None):
+ item_id = item_id or '%dp' % height
+ if item_id not in item_url:
+ return
+ width = int(round(aspect_ratio * height))
+ container.append({
+ 'url': item_url,
+ id_key: item_id,
+ 'width': width,
+ 'height': height
+ })
+
+ formats = []
+ thumbnails = []
+ for k, v in clip.items():
+ if not (v and isinstance(v, compat_str)):
+ continue
+ mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
+ if not mobj:
+ continue
+ prefix = mobj.group(1)
+ height = int_or_none(mobj.group(2))
+ if prefix == 'contentUrl':
+ add_item(
+ formats, v, height or source_height,
+ item_id=None if height else 'source')
+ elif prefix == 'thumbnail':
+ add_item(thumbnails, v, height, 'id')
+
+ error = clip.get('error')
+ if not formats and error:
+ if error == 404:
+ raise ExtractorError(
+ 'That clip does not exist.',
+ expected=True, video_id=video_id)
+ else:
+ raise ExtractorError(
+ 'An unknown error occurred ({0}).'.format(error),
+ video_id=video_id)
+
+ self._sort_formats(formats)
+
+ # Necessary because the id of the author is not known in advance.
+ # Won't raise an issue if no profile can be found as this is optional.
+ author = try_get(
+ hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
+ author_id = str_or_none(author.get('id'))
+ author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clip.get('contentDescription'),
+ 'uploader': author.get('displayName'),
+ 'timestamp': float_or_none(clip.get('created'), 1000),
+ 'uploader_id': author_id,
+ 'uploader_url': author_url,
+ 'duration': int_or_none(clip.get('videoLengthSeconds')),
+ 'view_count': int_or_none(clip.get('views')),
+ 'like_count': int_or_none(clip.get('likes')),
+ 'comment_count': int_or_none(clip.get('comments')),
+ }
diff --git a/hypervideo_dl/extractor/medialaan.py b/hypervideo_dl/extractor/medialaan.py
new file mode 100644
index 0000000..788acf7
--- /dev/null
+++ b/hypervideo_dl/extractor/medialaan.py
@@ -0,0 +1,114 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+)
+
+
+class MedialaanIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:embed\.)?mychannels.video/embed/|
+ embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
+ (?:www\.)?(?:
+ (?:
+ 7sur7|
+ demorgen|
+ hln|
+ joe|
+ qmusic
+ )\.be|
+ (?:
+ [abe]d|
+ bndestem|
+ destentor|
+ gelderlander|
+ pzc|
+ tubantia|
+ volkskrant
+ )\.nl
+ )/video/(?:[^/]+/)*[^/?&#]+~p
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
+ 'info_dict': {
+ 'id': '193993',
+ 'ext': 'mp4',
+ 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
+ 'timestamp': 1611663540,
+ 'upload_date': '20210126',
+ 'duration': 238,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/script/production/193993',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/production/193993',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://mychannels.video/embed/193993',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.mychannels.video/embed/193993',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ entries = []
+ for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
+ mychannels_id = extract_attributes(element).get('data-mychannels-id')
+ if mychannels_id:
+ entries.append('https://mychannels.video/embed/' + mychannels_id)
+ return entries
+
+ def _real_extract(self, url):
+ production_id = self._match_id(url)
+ production = self._download_json(
+ 'https://embed.mychannels.video/sdk/production/' + production_id,
+ production_id, query={'options': 'UUUU_default'})['productions'][0]
+ title = production['title']
+
+ formats = []
+ for source in (production.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ ext = mimetype2ext(source.get('type'))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, production_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'ext': ext,
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': production_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': production.get('posterUrl'),
+ 'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
+ 'duration': int_or_none(production.get('duration')) or None,
+ }
diff --git a/hypervideo_dl/extractor/mediaset.py b/hypervideo_dl/extractor/mediaset.py
new file mode 100644
index 0000000..2c16fc9
--- /dev/null
+++ b/hypervideo_dl/extractor/mediaset.py
@@ -0,0 +1,182 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformBaseIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ update_url_query,
+)
+
+
+class MediasetIE(ThePlatformBaseIE):
+ _TP_TLD = 'eu'
+ _VALID_URL = r'''(?x)
+ (?:
+ mediaset:|
+ https?://
+ (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (?:
+ (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
+ player/index\.html\?.*?\bprogramGuid=
+ )
+ )(?P<id>[0-9A-Z]{16,})
+ '''
+ _TESTS = [{
+ # full episode
+ 'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
+ 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
+ 'info_dict': {
+ 'id': 'FAFU000000661824',
+ 'ext': 'mp4',
+ 'title': 'Quarta puntata',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1414.26,
+ 'upload_date': '20161107',
+ 'series': 'Hello Goodbye',
+ 'timestamp': 1478532900,
+ 'uploader': 'Rete 4',
+ 'uploader_id': 'R4',
+ },
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
+ 'md5': '288532f0ad18307705b01e581304cd7b',
+ 'info_dict': {
+ 'id': 'F309013801000501',
+ 'ext': 'mp4',
+ 'title': 'Puntata del 25 maggio',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 6565.007,
+ 'upload_date': '20180526',
+ 'series': 'Matrix',
+ 'timestamp': 1527326245,
+ 'uploader': 'Canale 5',
+ 'uploader_id': 'C5',
+ },
+ }, {
+ # clip
+ 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
+ 'only_matching': True,
+ }, {
+ # iframe simple
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
+ 'only_matching': True,
+ }, {
+ # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
+ 'only_matching': True,
+ }, {
+ 'url': 'mediaset:FAFU000000665924',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(ie, webpage):
+ def _qs(url):
+ return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+ def _program_guid(qs):
+ return qs.get('programGuid', [None])[0]
+
+ entries = []
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
+ webpage):
+ embed_url = mobj.group('url')
+ embed_qs = _qs(embed_url)
+ program_guid = _program_guid(embed_qs)
+ if program_guid:
+ entries.append(embed_url)
+ continue
+ video_id = embed_qs.get('id', [None])[0]
+ if not video_id:
+ continue
+ urlh = ie._request_webpage(
+ embed_url, video_id, note='Following embed URL redirect')
+ embed_url = urlh.geturl()
+ program_guid = _program_guid(_qs(embed_url))
+ if program_guid:
+ entries.append(embed_url)
+ return entries
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ for video in smil.findall(self._xpath_ns('.//video', namespace)):
+ video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
+ return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
+
+ def _real_extract(self, url):
+ guid = self._match_id(url)
+ tp_path = 'PR1GhC/media/guid/2702976343/' + guid
+ info = self._extract_theplatform_metadata(tp_path, guid)
+
+ formats = []
+ subtitles = {}
+ first_e = None
+ for asset_type in ('SD', 'HD'):
+ # TODO: fixup ISM+none manifest URLs
+ for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
+ 'mbr': 'true',
+ 'formats': f,
+ 'assetTypes': asset_type,
+ }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
+ except ExtractorError as e:
+ if not first_e:
+ first_e = e
+ break
+ for tp_f in tp_formats:
+ tp_f['quality'] = 1 if asset_type == 'HD' else 0
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if first_e and not formats:
+ raise first_e
+ self._sort_formats(formats)
+
+ fields = []
+ for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
+ fields.extend(templ % repl for repl in repls)
+ feed_data = self._download_json(
+ 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
+ guid, fatal=False, query={'fields': ','.join(fields)})
+ if feed_data:
+ publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
+ info.update({
+ 'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
+ 'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
+ 'series': feed_data.get('mediasetprogram$brandTitle'),
+ 'uploader': publish_info.get('description'),
+ 'uploader_id': publish_info.get('channel'),
+ 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
+ })
+
+ info.update({
+ 'id': guid,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/mediasite.py b/hypervideo_dl/extractor/mediasite.py
new file mode 100644
index 0000000..d6eb157
--- /dev/null
+++ b/hypervideo_dl/extractor/mediasite.py
@@ -0,0 +1,366 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ unsmuggle_url,
+ url_or_none,
+ urljoin,
+)
+
+
+_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
+
+
+class MediasiteIE(InfoExtractor):
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
+ _TESTS = [
+ {
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
+ 'info_dict': {
+ 'id': '2db6c271681e4f199af3c60d1f82869b1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
+ 'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
+ 'timestamp': 1474268400.0,
+ 'upload_date': '20160919',
+ },
+ },
+ {
+ 'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
+ 'info_dict': {
+ 'id': '90bb363295d945d6b548c867d01181361d',
+ 'ext': 'mp4',
+ 'upload_date': '20150429',
+ 'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
+ 'timestamp': 1430311380.0,
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
+ 'md5': '481fda1c11f67588c0d9d8fbdced4e39',
+ 'info_dict': {
+ 'id': '585a43626e544bdd97aeb71a0ec907a01d',
+ 'ext': 'mp4',
+ 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
+ 'duration': 7713.088,
+ 'timestamp': 1413309600,
+ 'upload_date': '20141014',
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
+ 'md5': 'ef1fdded95bdf19b12c5999949419c92',
+ 'info_dict': {
+ 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
+ 'ext': 'wmv',
+ 'title': '64ste Vakantiecursus: Afvalwater',
+ 'description': 'md5:7fd774865cc69d972f542b157c328305',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+ 'duration': 10853,
+ 'timestamp': 1326446400,
+ 'upload_date': '20120113',
+ },
+ },
+ {
+ 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+ 'md5': '9422edc9b9a60151727e4b6d8bef393d',
+ 'info_dict': {
+ 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+ 'ext': 'mp4',
+ 'title': 'Xyce Software Training - Section 1',
+ 'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
+ 'upload_date': '20120409',
+ 'timestamp': 1333983600,
+ 'duration': 7794,
+ }
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
+ 'only_matching': True,
+ },
+ {
+ # dashed id
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
+ 'only_matching': True,
+ }
+ ]
+
+ # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
+ _STREAM_TYPES = {
+ 0: 'video1', # the main video
+ 2: 'slide',
+ 3: 'presentation',
+ 4: 'video2', # screencast?
+ 5: 'video3',
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(
+ r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
+ webpage)]
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+ mobj = re.match(self._VALID_URL, url)
+ resource_id = mobj.group('id')
+ query = mobj.group('query')
+
+ webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
+ redirect_url = urlh.geturl()
+
+ # XXX: might have also extracted UrlReferrer and QueryString from the html
+ service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
+ r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
+ default='/Mediasite/PlayerService/PlayerService.svc/json'))
+
+ player_options = self._download_json(
+ '%s/GetPlayerOptions' % service_path, resource_id,
+ headers={
+ 'Content-type': 'application/json; charset=utf-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ },
+ data=json.dumps({
+ 'getPlayerOptionsRequest': {
+ 'ResourceId': resource_id,
+ 'QueryString': query,
+ 'UrlReferrer': data.get('UrlReferrer', ''),
+ 'UseScreenReader': False,
+ }
+ }).encode('utf-8'))['d']
+
+ presentation = player_options['Presentation']
+ title = presentation['Title']
+
+ if presentation is None:
+ raise ExtractorError(
+ 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
+ expected=True)
+
+ thumbnails = []
+ formats = []
+ for snum, Stream in enumerate(presentation['Streams']):
+ stream_type = Stream.get('StreamType')
+ if stream_type is None:
+ continue
+
+ video_urls = Stream.get('VideoUrls')
+ if not isinstance(video_urls, list):
+ video_urls = []
+
+ stream_id = self._STREAM_TYPES.get(
+ stream_type, 'type%u' % stream_type)
+
+ stream_formats = []
+ for unum, VideoUrl in enumerate(video_urls):
+ video_url = url_or_none(VideoUrl.get('Location'))
+ if not video_url:
+ continue
+ # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
+
+ media_type = VideoUrl.get('MediaType')
+ if media_type == 'SS':
+ stream_formats.extend(self._extract_ism_formats(
+ video_url, resource_id,
+ ism_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ elif media_type == 'Dash':
+ stream_formats.extend(self._extract_mpd_formats(
+ video_url, resource_id,
+ mpd_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ else:
+ stream_formats.append({
+ 'format_id': '%s-%u.%u' % (stream_id, snum, unum),
+ 'url': video_url,
+ 'ext': mimetype2ext(VideoUrl.get('MimeType')),
+ })
+
+ # TODO: if Stream['HasSlideContent']:
+ # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
+ # from Stream['Slides']
+ # this will require writing a custom downloader...
+
+ # disprefer 'secondary' streams
+ if stream_type != 0:
+ for fmt in stream_formats:
+ fmt['preference'] = -1
+
+ thumbnail_url = Stream.get('ThumbnailUrl')
+ if thumbnail_url:
+ thumbnails.append({
+ 'id': '%s-%u' % (stream_id, snum),
+ 'url': urljoin(redirect_url, thumbnail_url),
+ 'preference': -1 if stream_type != 0 else 0,
+ })
+ formats.extend(stream_formats)
+
+ self._sort_formats(formats)
+
+ # XXX: Presentation['Presenters']
+ # XXX: Presentation['Transcript']
+
+ return {
+ 'id': resource_id,
+ 'title': title,
+ 'description': presentation.get('Description'),
+ 'duration': float_or_none(presentation.get('Duration'), 1000),
+ 'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class MediasiteCatalogIE(InfoExtractor):
+ _VALID_URL = r'''(?xi)
+ (?P<url>https?://[^/]+/Mediasite)
+ /Catalog/Full/
+ (?P<catalog_id>{0})
+ (?:
+ /(?P<current_folder_id>{0})
+ /(?P<root_dynamic_folder_id>{0})
+ )?
+ '''.format(_ID_RE)
+ _TESTS = [{
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',
+ 'info_dict': {
+ 'id': '631f9e48530d454381549f955d08c75e21',
+ 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',
+ },
+ 'playlist_count': 6,
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with CurrentFolderId and RootDynamicFolderId
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'info_dict': {
+ 'id': '9518c4a6c5cf4993b21cbd53e828a92521',
+ 'title': 'IUSM Family and Friends Sessions',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',
+ 'only_matching': True,
+ }, {
+ # no AntiForgeryToken
+ 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'only_matching': True,
+ }, {
+ # dashed id
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_id = mobj.group('catalog_id')
+ current_folder_id = mobj.group('current_folder_id') or catalog_id
+ root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')
+
+ webpage = self._download_webpage(url, catalog_id)
+
+ # AntiForgeryToken is optional (e.g. [1])
+ # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21
+ anti_forgery_token = self._search_regex(
+ r'AntiForgeryToken\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery token', default=None, group='value')
+ if anti_forgery_token:
+ anti_forgery_header = self._search_regex(
+ r'AntiForgeryHeaderName\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery header name',
+ default='X-SOFO-AntiForgeryHeader', group='value')
+
+ data = {
+ 'IsViewPage': True,
+ 'IsNewFolder': True,
+ 'AuthTicket': None,
+ 'CatalogId': catalog_id,
+ 'CurrentFolderId': current_folder_id,
+ 'RootDynamicFolderId': root_dynamic_folder_id,
+ 'ItemsPerPage': 1000,
+ 'PageIndex': 0,
+ 'PermissionMask': 'Execute',
+ 'CatalogSearchType': 'SearchInFolder',
+ 'SortBy': 'Date',
+ 'SortDirection': 'Descending',
+ 'StartDate': None,
+ 'EndDate': None,
+ 'StatusFilterList': None,
+ 'PreviewKey': None,
+ 'Tags': [],
+ }
+
+ headers = {
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ if anti_forgery_token:
+ headers[anti_forgery_header] = anti_forgery_token
+
+ catalog = self._download_json(
+ '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url,
+ catalog_id, data=json.dumps(data).encode(), headers=headers)
+
+ entries = []
+ for video in catalog['PresentationDetailsList']:
+ if not isinstance(video, dict):
+ continue
+ video_id = str_or_none(video.get('Id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ '%s/Play/%s' % (mediasite_url, video_id),
+ ie=MediasiteIE.ie_key(), video_id=video_id))
+
+ title = try_get(
+ catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
+
+ return self.playlist_result(entries, catalog_id, title,)
+
+
+class MediasiteNamedCatalogIE(InfoExtractor):
+ _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_name = mobj.group('catalog_name')
+
+ webpage = self._download_webpage(url, catalog_name)
+
+ catalog_id = self._search_regex(
+ r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
+
+ return self.url_result(
+ '%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
+ ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)
diff --git a/hypervideo_dl/extractor/medici.py b/hypervideo_dl/extractor/medici.py
new file mode 100644
index 0000000..cd91023
--- /dev/null
+++ b/hypervideo_dl/extractor/medici.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class MediciIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
+ _TEST = {
+ 'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
+ 'md5': '004c21bb0a57248085b6ff3fec72719d',
+ 'info_dict': {
+ 'id': '3059',
+ 'ext': 'flv',
+ 'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
+ 'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170408',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Sets csrftoken cookie
+ self._download_webpage(url, video_id)
+
+ MEDICI_URL = 'http://www.medici.tv/'
+
+ data = self._download_json(
+ MEDICI_URL, video_id,
+ data=urlencode_postdata({
+ 'json': 'true',
+ 'page': '/%s' % video_id,
+ 'timezone_offset': -420,
+ }), headers={
+ 'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': MEDICI_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ video = data['video']['videos']['video1']
+
+ title = video.get('nom') or data['title']
+
+ video_id = video.get('id') or video_id
+ formats = self._extract_f4m_formats(
+ update_url_query(video['url_akamai'], {
+ 'hdcore': '3.1.0',
+ 'plugin=aasp': '3.1.0.43.124',
+ }), video_id, f4m_id='hds')
+
+ description = data.get('meta_description')
+ thumbnail = video.get('url_thumbnail') or data.get('main_image')
+ upload_date = unified_strdate(data['video'].get('date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/megaphone.py b/hypervideo_dl/extractor/megaphone.py
new file mode 100644
index 0000000..5bafa6c
--- /dev/null
+++ b/hypervideo_dl/extractor/megaphone.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class MegaphoneIE(InfoExtractor):
+ IE_NAME = 'megaphone.fm'
+ IE_DESC = 'megaphone.fm embedded players'
+ _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://player.megaphone.fm/GLT9749789991?"',
+ 'md5': '4816a0de523eb3e972dc0dda2c191f96',
+ 'info_dict': {
+ 'id': 'GLT9749789991',
+ 'ext': 'mp3',
+ 'title': '#97 What Kind Of Idiot Gets Phished?',
+ 'thumbnail': r're:^https://.*\.png.*$',
+ 'duration': 1776.26375,
+ 'author': 'Reply All',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_property('audio:title', webpage)
+ author = self._og_search_property('audio:artist', webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
+ episode_data = self._parse_json(episode_json, video_id, js_to_json)
+ video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
+
+ formats = [{
+ 'url': video_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'author': author,
+ 'duration': episode_data['duration'],
+ 'formats': formats,
+ }
+
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return [m[0] for m in re.findall(
+ r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]
diff --git a/hypervideo_dl/extractor/meipai.py b/hypervideo_dl/extractor/meipai.py
new file mode 100644
index 0000000..2445b8b
--- /dev/null
+++ b/hypervideo_dl/extractor/meipai.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_timestamp,
+)
+
+
+class MeipaiIE(InfoExtractor):
+ IE_DESC = '美拍'
+ _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # regular uploaded video
+ 'url': 'http://www.meipai.com/media/531697625',
+ 'md5': 'e3e9600f9e55a302daecc90825854b4f',
+ 'info_dict': {
+ 'id': '531697625',
+ 'ext': 'mp4',
+ 'title': '#葉子##阿桑##余姿昀##超級女聲#',
+ 'description': '#葉子##阿桑##余姿昀##超級女聲#',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 152,
+ 'timestamp': 1465492420,
+ 'upload_date': '20160609',
+ 'view_count': 35511,
+ 'creator': '她她-TATA',
+ 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
+ }
+ }, {
+ # record of live streaming
+ 'url': 'http://www.meipai.com/media/585526361',
+ 'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
+ 'info_dict': {
+ 'id': '585526361',
+ 'ext': 'mp4',
+ 'title': '姿昀和善願 練歌練琴啦😁😁😁',
+ 'description': '姿昀和善願 練歌練琴啦😁😁😁',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 5975,
+ 'timestamp': 1474311799,
+ 'upload_date': '20160919',
+ 'view_count': 1215,
+ 'creator': '她她-TATA',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'<title[^>]*>([^<]+)</title>', webpage, 'title')
+
+ formats = []
+
+ # recorded playback of live streaming
+ m3u8_url = self._html_search_regex(
+ r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
+ webpage, 'm3u8 url', group='url', default=None)
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ if not formats:
+ # regular uploaded video
+ video_url = self._search_regex(
+ r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
+ group='url', default=None)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http',
+ })
+
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, 'release date', fatal=False))
+
+ tags = self._og_search_property(
+ 'video:tag', webpage, 'tags', default='').split(',')
+
+ view_count = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+ creator = self._og_search_property(
+ 'video:director', webpage, 'creator', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'creator': creator,
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/melonvod.py b/hypervideo_dl/extractor/melonvod.py
new file mode 100644
index 0000000..bd8cf13
--- /dev/null
+++ b/hypervideo_dl/extractor/melonvod.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urljoin,
+)
+
+
+class MelonVODIE(InfoExtractor):
+ _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
+ 'info_dict': {
+ 'id': '50158734',
+ 'ext': 'mp4',
+ 'title': "Jessica 'Wonderland' MV Making Film",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'artist': 'Jessica (제시카)',
+ 'upload_date': '20161212',
+ 'duration': 203,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ play_info = self._download_json(
+ 'http://vod.melon.com/video/playerInfo.json', video_id,
+ note='Downloading player info JSON', query={'mvId': video_id})
+
+ title = play_info['mvInfo']['MVTITLE']
+
+ info = self._download_json(
+ 'http://vod.melon.com/delivery/streamingInfo.json', video_id,
+ note='Downloading streaming info JSON',
+ query={
+ 'contsId': video_id,
+ 'contsType': 'VIDEO',
+ })
+
+ stream_info = info['streamingInfo']
+
+ formats = self._extract_m3u8_formats(
+ stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ artist_list = play_info.get('artistList')
+ artist = None
+ if isinstance(artist_list, list):
+ artist = ', '.join(
+ [a['ARTISTNAMEWEBLIST']
+ for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
+
+ thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
+
+ duration = int_or_none(stream_info.get('playTime'))
+ upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'artist': artist,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/meta.py b/hypervideo_dl/extractor/meta.py
new file mode 100644
index 0000000..cdb46e1
--- /dev/null
+++ b/hypervideo_dl/extractor/meta.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .pladform import PladformIE
+from ..utils import (
+ unescapeHTML,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class METAIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://video.meta.ua/5502115.video',
+ 'md5': '71b6f3ee274bef16f1ab410f7f56b476',
+ 'info_dict': {
+ 'id': '5502115',
+ 'ext': 'mp4',
+ 'title': 'Sony Xperia Z camera test [HQ]',
+ 'description': 'Xperia Z shoots video in FullHD HDR.',
+ 'uploader_id': 'nomobile',
+ 'uploader': 'CHЁZA.TV',
+ 'upload_date': '20130211',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'http://video.meta.ua/iframe/5502115',
+ 'only_matching': True,
+ }, {
+ # pladform embed
+ 'url': 'http://video.meta.ua/7121015.video',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ st_html5 = self._search_regex(
+ r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
+
+ if st_html5:
+ # uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
+ json_str = ''
+ for i in range(0, len(st_html5), 3):
+ json_str += '&#x0%s;' % st_html5[i:i + 3]
+ uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
+ error = uppod_data.get('customnotfound')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ video_url = uppod_data['file']
+ info = {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': uppod_data.get('comment') or self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(self._og_search_property(
+ 'video:duration', webpage, default=None)),
+ }
+ if 'youtube.com/' in video_url:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ })
+ return info
+
+ pladform_url = PladformIE._extract_url(webpage)
+ if pladform_url:
+ return self.url_result(pladform_url)
diff --git a/hypervideo_dl/extractor/metacafe.py b/hypervideo_dl/extractor/metacafe.py
new file mode 100644
index 0000000..9e92416
--- /dev/null
+++ b/hypervideo_dl/extractor/metacafe.py
@@ -0,0 +1,287 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ get_element_by_attribute,
+ mimetype2ext,
+)
+
+
+class MetacafeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<video_id>[^/]+)/(?P<display_id>[^/?#]+)'
+ _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
+ _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
+ IE_NAME = 'metacafe'
+ _TESTS = [
+ # Youtube video
+ {
+ 'add_ie': ['Youtube'],
+ 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
+ 'info_dict': {
+ 'id': '_aUehQsCQtM',
+ 'ext': 'mp4',
+ 'upload_date': '20090102',
+ 'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
+ 'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
+ 'uploader': 'PBS',
+ 'uploader_id': 'PBS'
+ }
+ },
+ # Normal metacafe video
+ {
+ 'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+ 'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
+ 'info_dict': {
+ 'id': '11121940',
+ 'ext': 'mp4',
+ 'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
+ 'uploader': 'ign',
+ 'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+ },
+ 'skip': 'Page is temporarily unavailable.',
+ },
+ # metacafe video with family filter
+ {
+ 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
+ 'md5': 'b06082c5079bbdcde677a6291fbdf376',
+ 'info_dict': {
+ 'id': '2155630',
+ 'ext': 'mp4',
+ 'title': 'Adult Art By David Hart 156',
+ 'uploader': '63346',
+ 'description': 'md5:9afac8fc885252201ad14563694040fc',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # AnyClip video
+ {
+ 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
+ 'info_dict': {
+ 'id': 'an-dVVXnuY7Jh77J',
+ 'ext': 'mp4',
+ 'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
+ 'uploader': 'AnyClip',
+ 'description': 'md5:cbef0460d31e3807f6feb4e7a5952e5b',
+ },
+ },
+ # age-restricted video
+ {
+ 'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+ 'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
+ 'info_dict': {
+ 'id': '5186653',
+ 'ext': 'mp4',
+ 'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+ 'uploader': 'Dwayne Pipe',
+ 'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
+ 'age_limit': 18,
+ },
+ },
+ # cbs video
+ {
+ 'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
+ 'info_dict': {
+ 'id': '8VD4r_Zws8VP',
+ 'ext': 'flv',
+ 'title': 'Open: This is Face the Nation, February 9',
+ 'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
+ 'duration': 96,
+ 'uploader': 'CBSI-NEW',
+ 'upload_date': '20140209',
+ 'timestamp': 1391959800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ # Movieclips.com video
+ {
+ 'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
+ 'info_dict': {
+ 'id': 'mv-Wy7ZU',
+ 'ext': 'mp4',
+ 'title': 'My Week with Marilyn - Do You Love Me?',
+ 'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
+ 'uploader': 'movie_trailers',
+ 'duration': 176,
+ },
+ 'params': {
+ 'skip_download': 'requires rtmpdump',
+ }
+ }
+ ]
+
+ def report_disclaimer(self):
+ self.to_screen('Retrieving disclaimer')
+
+ def _real_extract(self, url):
+ # Extract id and simplified title from URL
+ video_id, display_id = re.match(self._VALID_URL, url).groups()
+
+ # the video may come from an external site
+ m_external = re.match(r'^(\w{2})-(.*)$', video_id)
+ if m_external is not None:
+ prefix, ext_id = m_external.groups()
+ # Check if video comes from YouTube
+ if prefix == 'yt':
+ return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
+ # CBS videos use theplatform.com
+ if prefix == 'cb':
+ return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
+
+ headers = {
+ # Disable family filter
+ 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
+ }
+
+ # AnyClip videos require the flashversion cookie so that we get the link
+ # to the mp4 file
+ if video_id.startswith('an-'):
+ headers['Cookie'] += 'flashVersion=0; '
+
+ # Retrieve video webpage to extract further information
+ webpage = self._download_webpage(url, video_id, headers=headers)
+
+ error = get_element_by_attribute(
+ 'class', 'notfound-page-title', webpage)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ video_title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, 'title', default=None) or self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+
+ # Extract URL, uploader and title from webpage
+ self.report_extraction(video_id)
+ video_url = None
+ mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage)
+ if mobj is not None:
+ mediaURL = compat_urllib_parse_unquote(mobj.group(1))
+ video_ext = determine_ext(mediaURL)
+
+ # Extract gdaKey if available
+ mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
+ if mobj is None:
+ video_url = mediaURL
+ else:
+ gdaKey = mobj.group(1)
+ video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
+ if video_url is None:
+ mobj = re.search(r'<video src="([^"]+)"', webpage)
+ if mobj:
+ video_url = mobj.group(1)
+ video_ext = 'mp4'
+ if video_url is None:
+ flashvars = self._search_regex(
+ r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
+ default=None)
+ if flashvars:
+ vardict = compat_parse_qs(flashvars)
+ if 'mediaData' not in vardict:
+ raise ExtractorError('Unable to extract media URL')
+ mobj = re.search(
+ r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
+ if mobj is None:
+ raise ExtractorError('Unable to extract media URL')
+ mediaURL = mobj.group('mediaURL').replace('\\/', '/')
+ video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
+ video_ext = determine_ext(video_url)
+ if video_url is None:
+ player_url = self._search_regex(
+ r"swfobject\.embedSWF\('([^']+)'",
+ webpage, 'config URL', default=None)
+ if player_url:
+ config_url = self._search_regex(
+ r'config=(.+)$', player_url, 'config URL')
+ config_doc = self._download_xml(
+ config_url, video_id,
+ note='Downloading video config')
+ smil_url = config_doc.find('.//properties').attrib['smil_file']
+ smil_doc = self._download_xml(
+ smil_url, video_id,
+ note='Downloading SMIL document')
+ base_url = smil_doc.find('./head/meta').attrib['base']
+ video_url = []
+ for vn in smil_doc.findall('.//video'):
+ br = int(vn.attrib['system-bitrate'])
+ play_path = vn.attrib['src']
+ video_url.append({
+ 'format_id': 'smil-%d' % br,
+ 'url': base_url,
+ 'play_path': play_path,
+ 'page_url': url,
+ 'player_url': player_url,
+ 'ext': play_path.partition(':')[0],
+ })
+ if video_url is None:
+ flashvars = self._parse_json(self._search_regex(
+ r'flashvars\s*=\s*({.*});', webpage, 'flashvars',
+ default=None), video_id, fatal=False)
+ if flashvars:
+ video_url = []
+ for source in flashvars.get('sources'):
+ source_url = source.get('src')
+ if not source_url:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
+ if ext == 'm3u8':
+ video_url.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ video_url.append({
+ 'url': source_url,
+ 'ext': ext,
+ })
+
+ if video_url is None:
+ raise ExtractorError('Unsupported video type')
+
+ description = self._html_search_meta(
+ ['og:description', 'twitter:description', 'description'],
+ webpage, 'title', fatal=False)
+ thumbnail = self._html_search_meta(
+ ['og:image', 'twitter:image'], webpage, 'title', fatal=False)
+ video_uploader = self._html_search_regex(
+ r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
+ webpage, 'uploader nickname', fatal=False)
+ duration = int_or_none(
+ self._html_search_meta('video:duration', webpage, default=None))
+ age_limit = (
+ 18
+ if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
+ else 0)
+
+ if isinstance(video_url, list):
+ formats = video_url
+ else:
+ formats = [{
+ 'url': video_url,
+ 'ext': video_ext,
+ }]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'description': description,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/metacritic.py b/hypervideo_dl/extractor/metacritic.py
new file mode 100644
index 0000000..7d468d7
--- /dev/null
+++ b/hypervideo_dl/extractor/metacritic.py
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ fix_xml_ampersands,
+)
+
+
+class MetacriticIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
+ 'info_dict': {
+ 'id': '3698222',
+ 'ext': 'mp4',
+ 'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
+ 'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
+ 'duration': 221,
+ },
+ 'skip': 'Not providing trailers anymore',
+ }, {
+ 'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
+ 'info_dict': {
+ 'id': '5740315',
+ 'ext': 'mp4',
+ 'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
+ 'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
+ 'duration': 114,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ # The xml is not well formatted, there are raw '&'
+ info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
+ video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
+
+ clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
+ formats = []
+ for videoFile in clip.findall('httpURI/videoFile'):
+ rate_str = videoFile.find('rate').text
+ video_url = videoFile.find('filePath').text
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': rate_str,
+ 'tbr': int(rate_str),
+ })
+ self._sort_formats(formats)
+
+ description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
+ webpage, 'description', flags=re.DOTALL)
+
+ return {
+ 'id': video_id,
+ 'title': clip.find('title').text,
+ 'formats': formats,
+ 'description': description,
+ 'duration': int(clip.find('duration').text),
+ }
diff --git a/hypervideo_dl/extractor/mgoon.py b/hypervideo_dl/extractor/mgoon.py
new file mode 100644
index 0000000..7bb4739
--- /dev/null
+++ b/hypervideo_dl/extractor/mgoon.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ qualities,
+ unified_strdate,
+)
+
+
+class MgoonIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)|
+ video\.mgoon\.com)/(?P<id>[0-9]+)'''
+ _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}'
+ _TESTS = [
+ {
+ 'url': 'http://m.mgoon.com/ch/hi6618/v/5582148',
+ 'md5': 'dd46bb66ab35cf6d51cc812fd82da79d',
+ 'info_dict': {
+ 'id': '5582148',
+ 'uploader_id': 'hi6618',
+ 'duration': 240.419,
+ 'upload_date': '20131220',
+ 'ext': 'mp4',
+ 'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ },
+ {
+ 'url': 'http://www.mgoon.com/play/view/5582148',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://video.mgoon.com/5582148',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ data = self._download_json(self._API_URL.format(video_id), video_id)
+
+ if data.get('errorInfo', {}).get('code') != 'NONE':
+ raise ExtractorError('%s encountered an error: %s' % (
+ self.IE_NAME, data['errorInfo']['message']), expected=True)
+
+ v_info = data['videoInfo']
+ title = v_info.get('v_title')
+ thumbnail = v_info.get('v_thumbnail')
+ duration = v_info.get('v_duration')
+ upload_date = unified_strdate(v_info.get('v_reg_date'))
+ uploader_id = data.get('userInfo', {}).get('u_alias')
+ if duration:
+ duration /= 1000.0
+
+ age_limit = None
+ if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT':
+ age_limit = 18
+
+ formats = []
+ get_quality = qualities(['360p', '480p', '720p', '1080p'])
+ for fmt in data['videoFiles']:
+ formats.append({
+ 'format_id': fmt['label'],
+ 'quality': get_quality(fmt['label']),
+ 'url': fmt['url'],
+ 'ext': fmt['format'],
+
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'age_limit': age_limit,
+ }
diff --git a/hypervideo_dl/extractor/mgtv.py b/hypervideo_dl/extractor/mgtv.py
new file mode 100644
index 0000000..cab3aa0
--- /dev/null
+++ b/hypervideo_dl/extractor/mgtv.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class MGTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
+ IE_DESC = '芒果TV'
+
+ _TESTS = [{
+ 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
+ 'info_dict': {
+ 'id': '3116640',
+ 'ext': 'mp4',
+ 'title': '我是歌手 第四季',
+ 'description': '我是歌手第四季双年巅峰会',
+ 'duration': 7461,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.mgtv.com/b/301817/3826653.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://w.mgtv.com/b/301817/3826653.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
+ try:
+ api_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/video', video_id, query={
+ 'tk2': tk2,
+ 'video_id': video_id,
+ }, headers=self.geo_verification_headers())['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)
+ if error.get('code') == 40005:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(error['msg'], expected=True)
+ raise
+ info = api_data['info']
+ title = info['title'].strip()
+ stream_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
+ 'pm2': api_data['atc']['pm2'],
+ 'tk2': tk2,
+ 'video_id': video_id,
+ }, headers=self.geo_verification_headers())['data']
+ stream_domain = stream_data['stream_domain'][0]
+
+ formats = []
+ for idx, stream in enumerate(stream_data['stream']):
+ stream_path = stream.get('url')
+ if not stream_path:
+ continue
+ format_data = self._download_json(
+ stream_domain + stream_path, video_id,
+ note='Download video info for format #%d' % idx)
+ format_url = format_data.get('info')
+ if not format_url:
+ continue
+ tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
+ r'_(\d+)_mp4/', format_url, 'tbr', default=None))
+ formats.append({
+ 'format_id': compat_str(tbr or idx),
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ 'protocol': 'm3u8_native',
+ 'http_headers': {
+ 'Referer': url,
+ },
+ 'format_note': stream.get('name'),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': info.get('desc'),
+ 'duration': int_or_none(info.get('duration')),
+ 'thumbnail': info.get('thumb'),
+ }
diff --git a/hypervideo_dl/extractor/miaopai.py b/hypervideo_dl/extractor/miaopai.py
new file mode 100644
index 0000000..f9e35ac
--- /dev/null
+++ b/hypervideo_dl/extractor/miaopai.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MiaoPaiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P<id>[-A-Za-z0-9~_]+)'
+ _TEST = {
+ 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm',
+ 'md5': '095ed3f1cd96b821add957bdc29f845b',
+ 'info_dict': {
+ 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__',
+ 'ext': 'mp4',
+ 'title': '西游记音乐会的秒拍视频',
+ 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg',
+ }
+ }
+
+ _USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ thumbnail = self._html_search_regex(
+ r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
+ webpage, 'thumbnail', fatal=False, group='url')
+ videos = self._parse_html5_media_entries(url, webpage, video_id)
+ info = videos[0]
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/microsoftvirtualacademy.py b/hypervideo_dl/extractor/microsoftvirtualacademy.py
new file mode 100644
index 0000000..8e0aee0
--- /dev/null
+++ b/hypervideo_dl/extractor/microsoftvirtualacademy.py
@@ -0,0 +1,195 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_xpath,
+)
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ smuggle_url,
+ unsmuggle_url,
+ xpath_text,
+)
+
+
+class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
+ def _extract_base_url(self, course_id, display_id):
+ return self._download_json(
+ 'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
+ display_id, 'Downloading course base URL')
+
+ def _extract_chapter_and_title(self, title):
+ if not title:
+ return None, None
+ m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
+ return (int(m.group('chapter')), m.group('title')) if m else (None, title)
+
+
+class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
+ IE_NAME = 'mva'
+ IE_DESC = 'Microsoft Virtual Academy videos'
+ _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
+
+ _TESTS = [{
+ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
+ 'md5': '7826c44fc31678b12ad8db11f6b5abb9',
+ 'info_dict': {
+ 'id': 'gfVXISmEB_6804984382',
+ 'ext': 'mp4',
+ 'title': 'Course Introduction',
+ 'formats': 'mincount:3',
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'ttml',
+ }],
+ },
+ }
+ }, {
+ 'url': 'mva:11788:gfVXISmEB_6804984382',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('course_id')
+ video_id = mobj.group('id')
+
+ base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
+
+ settings = self._download_xml(
+ '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
+ video_id, 'Downloading video settings XML')
+
+ _, title = self._extract_chapter_and_title(xpath_text(
+ settings, './/Title', 'title', fatal=True))
+
+ formats = []
+
+ for sources in settings.findall(compat_xpath('.//MediaSources')):
+ sources_type = sources.get('videoType')
+ for source in sources.findall(compat_xpath('./MediaSource')):
+ video_url = source.text
+ if not video_url or not video_url.startswith('http'):
+ continue
+ if sources_type == 'smoothstreaming':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
+ continue
+ video_mode = source.get('videoMode')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
+ codec = source.get('codec')
+ acodec, vcodec = [None] * 2
+ if codec:
+ codecs = codec.split(',')
+ if len(codecs) == 2:
+ acodec, vcodec = codecs
+ elif len(codecs) == 1:
+ vcodec = codecs[0]
+ formats.append({
+ 'url': video_url,
+ 'format_id': video_mode,
+ 'height': height,
+ 'acodec': acodec,
+ 'vcodec': vcodec,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
+ subtitle_url = source.text
+ if not subtitle_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': '%s/%s' % (base_url, subtitle_url),
+ 'ext': source.get('type'),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'subtitles': subtitles,
+ 'formats': formats
+ }
+
+
+class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
+ IE_NAME = 'mva:course'
+ IE_DESC = 'Microsoft Virtual Academy courses'
+ _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
+
+ _TESTS = [{
+ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+ 'info_dict': {
+ 'id': '11788',
+ 'title': 'Microsoft Azure Fundamentals: Virtual Machines',
+ },
+ 'playlist_count': 36,
+ }, {
+ # with emphasized chapters
+ 'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
+ 'info_dict': {
+ 'id': '16335',
+ 'title': 'Developing Windows 10 Games with Construct 2',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+ 'only_matching': True,
+ }, {
+ 'url': 'mva:course:11788',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
+ MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ base_url = self._extract_base_url(course_id, display_id)
+
+ manifest = self._download_json(
+ '%s/imsmanifestlite.json' % base_url,
+ display_id, 'Downloading course manifest JSON')['manifest']
+
+ organization = manifest['organizations']['organization'][0]
+
+ entries = []
+ for chapter in organization['item']:
+ chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
+ chapter_id = chapter.get('@identifier')
+ for item in chapter.get('item', []):
+ item_id = item.get('@identifier')
+ if not item_id:
+ continue
+ metadata = item.get('resource', {}).get('metadata') or {}
+ if metadata.get('learningresourcetype') != 'Video':
+ continue
+ _, title = self._extract_chapter_and_title(item.get('title'))
+ duration = parse_duration(metadata.get('duration'))
+ description = metadata.get('description')
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ 'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_number,
+ 'chapter_id': chapter_id,
+ })
+
+ title = organization.get('title') or manifest.get('metadata', {}).get('title')
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/hypervideo_dl/extractor/minds.py b/hypervideo_dl/extractor/minds.py
new file mode 100644
index 0000000..8e9f0f8
--- /dev/null
+++ b/hypervideo_dl/extractor/minds.py
@@ -0,0 +1,196 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+)
+
+
+class MindsBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
+
+ def _call_api(self, path, video_id, resource, query=None):
+ api_url = 'https://www.minds.com/api/' + path
+ token = self._get_cookies(api_url).get('XSRF-TOKEN')
+ return self._download_json(
+ api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
+ 'Referer': 'https://www.minds.com/',
+ 'X-XSRF-TOKEN': token.value if token else '',
+ }, query=query)
+
+
+class MindsIE(MindsBaseIE):
+ IE_NAME = 'minds'
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.minds.com/media/100000000000086822',
+ 'md5': '215a658184a419764852239d4970b045',
+ 'info_dict': {
+ 'id': '100000000000086822',
+ 'ext': 'mp4',
+ 'title': 'Minds intro sequence',
+ 'thumbnail': r're:https?://.+\.png',
+ 'uploader_id': 'ottman',
+ 'upload_date': '20130524',
+ 'timestamp': 1369404826,
+ 'uploader': 'Bill Ottman',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['animation'],
+ 'comment_count': int,
+ 'license': 'attribution-cc',
+ },
+ }, {
+ # entity.type == 'activity' and empty title
+ 'url': 'https://www.minds.com/newsfeed/798025111988506624',
+ 'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
+ 'info_dict': {
+ 'id': '798022190320226304',
+ 'ext': 'mp4',
+ 'title': '798022190320226304',
+ 'uploader': 'ColinFlaherty',
+ 'upload_date': '20180111',
+ 'timestamp': 1515639316,
+ 'uploader_id': 'ColinFlaherty',
+ },
+ }, {
+ 'url': 'https://www.minds.com/archive/view/715172106794442752',
+ 'only_matching': True,
+ }, {
+ # youtube perma_url
+ 'url': 'https://www.minds.com/newsfeed/1197131838022602752',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ entity_id = self._match_id(url)
+ entity = self._call_api(
+ 'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
+ if entity.get('type') == 'activity':
+ if entity.get('custom_type') == 'video':
+ video_id = entity['entity_guid']
+ else:
+ return self.url_result(entity['perma_url'])
+ else:
+ assert(entity['subtype'] == 'video')
+ video_id = entity_id
+ # 1080p and webm formats available only on the sources array
+ video = self._call_api(
+ 'v2/media/video/' + video_id, video_id, 'video')
+
+ formats = []
+ for source in (video.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': source.get('label'),
+ 'height': int_or_none(source.get('size')),
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ entity = video.get('entity') or entity
+ owner = entity.get('ownerObj') or {}
+ uploader_id = owner.get('username')
+
+ tags = entity.get('tags')
+ if tags and isinstance(tags, compat_str):
+ tags = [tags]
+
+ thumbnail = None
+ poster = video.get('poster') or entity.get('thumbnail_src')
+ if poster:
+ urlh = self._request_webpage(poster, video_id, fatal=False)
+ if urlh:
+ thumbnail = urlh.geturl()
+
+ return {
+ 'id': video_id,
+ 'title': entity.get('title') or video_id,
+ 'formats': formats,
+ 'description': clean_html(entity.get('description')) or None,
+ 'license': str_or_none(entity.get('license')),
+ 'timestamp': int_or_none(entity.get('time_created')),
+ 'uploader': strip_or_none(owner.get('name')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
+ 'view_count': int_or_none(entity.get('play:count')),
+ 'like_count': int_or_none(entity.get('thumbs:up:count')),
+ 'dislike_count': int_or_none(entity.get('thumbs:down:count')),
+ 'tags': tags,
+ 'comment_count': int_or_none(entity.get('comments:count')),
+ 'thumbnail': thumbnail,
+ }
+
+
+class MindsFeedBaseIE(MindsBaseIE):
+ _PAGE_SIZE = 150
+
+ def _entries(self, feed_id):
+ query = {'limit': self._PAGE_SIZE, 'sync': 1}
+ i = 1
+ while True:
+ data = self._call_api(
+ 'v2/feeds/container/%s/videos' % feed_id,
+ feed_id, 'page %s' % i, query)
+ entities = data.get('entities') or []
+ for entity in entities:
+ guid = entity.get('guid')
+ if not guid:
+ continue
+ yield self.url_result(
+ 'https://www.minds.com/newsfeed/' + guid,
+ MindsIE.ie_key(), guid)
+ query['from_timestamp'] = data['load-next']
+ if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
+ break
+ i += 1
+
+ def _real_extract(self, url):
+ feed_id = self._match_id(url)
+ feed = self._call_api(
+ 'v1/%s/%s' % (self._FEED_PATH, feed_id),
+ feed_id, self._FEED_TYPE)[self._FEED_TYPE]
+
+ return self.playlist_result(
+ self._entries(feed['guid']), feed_id,
+ strip_or_none(feed.get('name')),
+ feed.get('briefdescription'))
+
+
+class MindsChannelIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'channel'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
+ _FEED_PATH = 'channel'
+ _TEST = {
+ 'url': 'https://www.minds.com/ottman',
+ 'info_dict': {
+ 'id': 'ottman',
+ 'title': 'Bill Ottman',
+ 'description': 'Co-creator & CEO @minds',
+ },
+ 'playlist_mincount': 54,
+ }
+
+
+class MindsGroupIE(MindsFeedBaseIE):
+ _FEED_TYPE = 'group'
+ IE_NAME = 'minds:' + _FEED_TYPE
+ _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
+ _FEED_PATH = 'groups/group'
+ _TEST = {
+ 'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
+ 'info_dict': {
+ 'id': '785582576369672204',
+ 'title': 'Cooking Videos',
+ },
+ 'playlist_mincount': 1,
+ }
diff --git a/hypervideo_dl/extractor/ministrygrid.py b/hypervideo_dl/extractor/ministrygrid.py
new file mode 100644
index 0000000..8ad9239
--- /dev/null
+++ b/hypervideo_dl/extractor/ministrygrid.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+)
+
+
+class MinistryGridIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
+
+ _TEST = {
+ 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
+ 'md5': '844be0d2a1340422759c2a9101bab017',
+ 'info_dict': {
+ 'id': '3453494717001',
+ 'ext': 'mp4',
+ 'title': 'The Gospel by Numbers',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20140410',
+ 'description': 'Coming soon from T4G 2014!',
+ 'uploader_id': '2034960640001',
+ 'timestamp': 1397145591,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['TDSLifeway'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ portlets = self._parse_json(self._search_regex(
+ r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
+ video_id)
+ pl_id = self._search_regex(
+ r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
+
+ for i, portlet in enumerate(portlets):
+ portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
+ portlet_code = self._download_webpage(
+ portlet_url, video_id,
+ note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
+ fatal=False)
+ video_iframe_url = self._search_regex(
+ r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
+ default=None)
+ if video_iframe_url:
+ return self.url_result(
+ smuggle_url(video_iframe_url, {'force_videoid': video_id}),
+ video_id=video_id)
+
+ raise ExtractorError('Could not find video iframe in any portlets')
diff --git a/hypervideo_dl/extractor/minoto.py b/hypervideo_dl/extractor/minoto.py
new file mode 100644
index 0000000..6367311
--- /dev/null
+++ b/hypervideo_dl/extractor/minoto.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_codecs,
+)
+
+
+class MinotoIE(InfoExtractor):
+ _VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ player_id = mobj.group('player_id') or '1'
+ video_id = mobj.group('id')
+ video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id)
+ video_metadata = video_data['video-metadata']
+ formats = []
+ for fmt in video_data['video-files']:
+ fmt_url = fmt.get('url')
+ if not fmt_url:
+ continue
+ container = fmt.get('container')
+ if container == 'hls':
+ formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ else:
+ fmt_profile = fmt.get('profile') or {}
+ formats.append({
+ 'format_id': fmt_profile.get('name-short'),
+ 'format_note': fmt_profile.get('name'),
+ 'url': fmt_url,
+ 'container': container,
+ 'tbr': int_or_none(fmt.get('bitrate')),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'codecs': parse_codecs(fmt.get('codecs')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_metadata['title'],
+ 'description': video_metadata.get('description'),
+ 'thumbnail': video_metadata.get('video-poster', {}).get('url'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/miomio.py b/hypervideo_dl/extractor/miomio.py
new file mode 100644
index 0000000..40f72d6
--- /dev/null
+++ b/hypervideo_dl/extractor/miomio.py
@@ -0,0 +1,141 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ xpath_text,
+ int_or_none,
+ ExtractorError,
+ sanitized_Request,
+)
+
+
+class MioMioIE(InfoExtractor):
+ IE_NAME = 'miomio.tv'
+ _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+ _TESTS = [{
+ # "type=video" in flashvars
+ 'url': 'http://www.miomio.tv/watch/cc88912/',
+ 'info_dict': {
+ 'id': '88912',
+ 'ext': 'flv',
+ 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
+ 'duration': 5923,
+ },
+ 'skip': 'Unable to load videos',
+ }, {
+ 'url': 'http://www.miomio.tv/watch/cc184024/',
+ 'info_dict': {
+ 'id': '43729',
+ 'title': '《动漫同人插画绘制》',
+ },
+ 'playlist_mincount': 86,
+ 'skip': 'Unable to load videos',
+ }, {
+ 'url': 'http://www.miomio.tv/watch/cc173113/',
+ 'info_dict': {
+ 'id': '173113',
+ 'title': 'The New Macbook 2015 上手试玩与简评'
+ },
+ 'playlist_mincount': 2,
+ 'skip': 'Unable to load videos',
+ }, {
+ # new 'h5' player
+ 'url': 'http://www.miomio.tv/watch/cc273997/',
+ 'md5': '0b27a4b4495055d826813f8c3a6b2070',
+ 'info_dict': {
+ 'id': '273997',
+ 'ext': 'mp4',
+ 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
+ },
+ 'skip': 'Unable to load videos',
+ }]
+
+ def _extract_mioplayer(self, webpage, video_id, title, http_headers):
+ xml_config = self._search_regex(
+ r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
+ webpage, 'xml config')
+
+ # skipping the following page causes lags and eventually connection drop-outs
+ self._request_webpage(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+ video_id)
+
+ vid_config_request = sanitized_Request(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+ headers=http_headers)
+
+ # the following xml contains the actual configuration information on the video file(s)
+ vid_config = self._download_xml(vid_config_request, video_id)
+
+ if not int_or_none(xpath_text(vid_config, 'timelength')):
+ raise ExtractorError('Unable to load videos!', expected=True)
+
+ entries = []
+ for f in vid_config.findall('./durl'):
+ segment_url = xpath_text(f, 'url', 'video url')
+ if not segment_url:
+ continue
+ order = xpath_text(f, 'order', 'order')
+ segment_id = video_id
+ segment_title = title
+ if order:
+ segment_id += '-%s' % order
+ segment_title += ' part %s' % order
+ entries.append({
+ 'id': segment_id,
+ 'url': segment_url,
+ 'title': segment_title,
+ 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+ 'http_headers': http_headers,
+ })
+
+ return entries
+
+ def _download_chinese_webpage(self, *args, **kwargs):
+ # Requests with English locales return garbage
+ headers = {
+ 'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3',
+ }
+ kwargs.setdefault('headers', {}).update(headers)
+ return self._download_webpage(*args, **kwargs)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_chinese_webpage(
+ url, video_id)
+
+ title = self._html_search_meta(
+ 'description', webpage, 'title', fatal=True)
+
+ mioplayer_path = self._search_regex(
+ r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
+
+ if '_h5' in mioplayer_path:
+ player_url = compat_urlparse.urljoin(url, mioplayer_path)
+ player_webpage = self._download_chinese_webpage(
+ player_url, video_id,
+ note='Downloading player webpage', headers={'Referer': url})
+ entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
+ http_headers = {'Referer': player_url}
+ else:
+ http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
+ entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
+
+ if len(entries) == 1:
+ segment = entries[0]
+ segment['id'] = video_id
+ segment['title'] = title
+ segment['http_headers'] = http_headers
+ return segment
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'entries': entries,
+ 'title': title,
+ 'http_headers': http_headers,
+ }
diff --git a/hypervideo_dl/extractor/mit.py b/hypervideo_dl/extractor/mit.py
new file mode 100644
index 0000000..e1506a7
--- /dev/null
+++ b/hypervideo_dl/extractor/mit.py
@@ -0,0 +1,132 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+ IE_NAME = 'techtv.mit.edu'
+ _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+ 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
+ 'info_dict': {
+ 'id': '25418',
+ 'ext': 'mp4',
+ 'title': 'MIT DNA and Protein Sets',
+ 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raw_page = self._download_webpage(
+ 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+ clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
+
+ base_url = self._proto_relative_url(self._search_regex(
+ r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
+ formats_json = self._search_regex(
+ r'bitrates: (\[.+?\])', raw_page, 'video formats')
+ formats_mit = json.loads(formats_json)
+ formats = [
+ {
+ 'format_id': f['label'],
+ 'url': base_url + f['url'].partition(':')[2],
+ 'ext': f['url'].partition(':')[0],
+ 'format': f['label'],
+ 'width': f['width'],
+ 'vbr': f['bitrate'],
+ }
+ for f in formats_mit
+ ]
+
+ title = get_element_by_id('edit-title', clean_page)
+ description = clean_html(get_element_by_id('edit-description', clean_page))
+ thumbnail = self._search_regex(
+ r'playlist:.*?url: \'(.+?)\'',
+ raw_page, 'thumbnail', flags=re.DOTALL)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
+
+
+class OCWMITIE(InfoExtractor):
+ IE_NAME = 'ocw.mit.edu'
+ _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
+ _BASE_URL = 'http://ocw.mit.edu/'
+
+ _TESTS = [
+ {
+ 'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
+ 'info_dict': {
+ 'id': 'EObHWIEKGjA',
+ 'ext': 'webm',
+ 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
+ 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
+ 'upload_date': '20121109',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
+ }
+ },
+ {
+ 'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
+ 'info_dict': {
+ 'id': '7K1sB05pE0A',
+ 'ext': 'mp4',
+ 'title': 'Session 1: Introduction to Derivatives',
+ 'upload_date': '20090818',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
+ 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ topic = mobj.group('topic')
+
+ webpage = self._download_webpage(url, topic)
+ title = self._html_search_meta('WT.cg_s', webpage)
+ description = self._html_search_meta('Description', webpage)
+
+ # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
+ embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
+ if embed_chapter_media:
+ metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
+ metadata = re.split(r', ?', metadata)
+ yt = metadata[1]
+ else:
+ # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
+ embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
+ if embed_media:
+ metadata = re.sub(r'[\'"]', '', embed_media.group(1))
+ metadata = re.split(r', ?', metadata)
+ yt = metadata[1]
+ else:
+ raise ExtractorError('Unable to find embedded YouTube video.')
+ video_id = YoutubeIE.extract_id(yt)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'url': yt,
+ 'ie_key': 'Youtube',
+ }
diff --git a/hypervideo_dl/extractor/mitele.py b/hypervideo_dl/extractor/mitele.py
new file mode 100644
index 0000000..b593723
--- /dev/null
+++ b/hypervideo_dl/extractor/mitele.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .telecinco import TelecincoIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class MiTeleIE(TelecincoIE):
+ IE_DESC = 'mitele.es'
+ _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
+
+ _TESTS = [{
+ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
+ 'info_dict': {
+ 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
+ 'ext': 'mp4',
+ 'title': 'Diario de La redacción Programa 144',
+ 'description': 'md5:07c35a7b11abb05876a6a79185b58d27',
+ 'series': 'Diario de',
+ 'season': 'Season 14',
+ 'season_number': 14,
+ 'episode': 'Tor, la web invisible',
+ 'episode_number': 3,
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'duration': 2913,
+ 'age_limit': 16,
+ 'timestamp': 1471209401,
+ 'upload_date': '20160814',
+ },
+ }, {
+ # no explicit title
+ 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
+ 'info_dict': {
+ 'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
+ 'ext': 'mp4',
+ 'title': 'Cuarto Milenio Temporada 6 Programa 226',
+ 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
+ 'series': 'Cuarto Milenio',
+ 'season': 'Season 6',
+ 'season_number': 6,
+ 'episode': 'Episode 24',
+ 'episode_number': 24,
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'duration': 7313,
+ 'age_limit': 12,
+ 'timestamp': 1471209021,
+ 'upload_date': '20160814',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pre_player = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
+ webpage, 'Pre Player'), display_id)['prePlayer']
+ title = pre_player['title']
+ video_info = self._parse_content(pre_player['video'], url)
+ content = pre_player.get('content') or {}
+ info = content.get('info') or {}
+
+ video_info.update({
+ 'title': title,
+ 'description': info.get('synopsis'),
+ 'series': content.get('title'),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode': content.get('subtitle'),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'duration': int_or_none(info.get('duration')),
+ 'age_limit': int_or_none(info.get('rating')),
+ 'timestamp': parse_iso8601(pre_player.get('publishedTime')),
+ })
+ return video_info
diff --git a/hypervideo_dl/extractor/mixcloud.py b/hypervideo_dl/extractor/mixcloud.py
new file mode 100644
index 0000000..6931985
--- /dev/null
+++ b/hypervideo_dl/extractor/mixcloud.py
@@ -0,0 +1,356 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_chr,
+ compat_ord,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_zip
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class MixcloudBaseIE(InfoExtractor):
+ def _call_api(self, object_type, object_fields, display_id, username, slug=None):
+ lookup_key = object_type + 'Lookup'
+ return self._download_json(
+ 'https://www.mixcloud.com/graphql', display_id, query={
+ 'query': '''{
+ %s(lookup: {username: "%s"%s}) {
+ %s
+ }
+}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
+ })['data'][lookup_key]
+
+
+class MixcloudIE(MixcloudBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
+ IE_NAME = 'mixcloud'
+
+ _TESTS = [{
+ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
+ 'info_dict': {
+ 'id': 'dholbach_cryptkeeper',
+ 'ext': 'm4a',
+ 'title': 'Cryptkeeper',
+ 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+ 'uploader': 'Daniel Holbach',
+ 'uploader_id': 'dholbach',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'timestamp': 1321359578,
+ 'upload_date': '20111115',
+ },
+ }, {
+ 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
+ 'info_dict': {
+ 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
+ 'ext': 'mp3',
+ 'title': 'Caribou 7 inch Vinyl Mix & Chat',
+ 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
+ 'uploader': 'Gilles Peterson Worldwide',
+ 'uploader_id': 'gillespeterson',
+ 'thumbnail': 're:https?://.*',
+ 'view_count': int,
+ 'timestamp': 1422987057,
+ 'upload_date': '20150203',
+ },
+ }, {
+ 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
+ 'only_matching': True,
+ }]
+ _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
+
+ @staticmethod
+ def _decrypt_xor_cipher(key, ciphertext):
+ """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
+ return ''.join([
+ compat_chr(compat_ord(ch) ^ compat_ord(k))
+ for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
+
+ def _real_extract(self, url):
+ username, slug = re.match(self._VALID_URL, url).groups()
+ username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
+ track_id = '%s_%s' % (username, slug)
+
+ cloudcast = self._call_api('cloudcast', '''audioLength
+ comments(first: 100) {
+ edges {
+ node {
+ comment
+ created
+ user {
+ displayName
+ username
+ }
+ }
+ }
+ totalCount
+ }
+ description
+ favorites {
+ totalCount
+ }
+ featuringArtistList
+ isExclusive
+ name
+ owner {
+ displayName
+ url
+ username
+ }
+ picture(width: 1024, height: 1024) {
+ url
+ }
+ plays
+ publishDate
+ reposts {
+ totalCount
+ }
+ streamInfo {
+ dashUrl
+ hlsUrl
+ url
+ }
+ tags {
+ tag {
+ name
+ }
+ }''', track_id, username, slug)
+
+ title = cloudcast['name']
+
+ stream_info = cloudcast['streamInfo']
+ formats = []
+
+ for url_key in ('url', 'hlsUrl', 'dashUrl'):
+ format_url = stream_info.get(url_key)
+ if not format_url:
+ continue
+ decrypted = self._decrypt_xor_cipher(
+ self._DECRYPTION_KEY, compat_b64decode(format_url))
+ if url_key == 'hlsUrl':
+ formats.extend(self._extract_m3u8_formats(
+ decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif url_key == 'dashUrl':
+ formats.extend(self._extract_mpd_formats(
+ decrypted, track_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': 'http',
+ 'url': decrypted,
+ 'downloader_options': {
+ # Mixcloud starts throttling at >~5M
+ 'http_chunk_size': 5242880,
+ },
+ })
+
+ if not formats and cloudcast.get('isExclusive'):
+ self.raise_login_required()
+
+ self._sort_formats(formats)
+
+ comments = []
+ for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
+ node = edge.get('node') or {}
+ text = strip_or_none(node.get('comment'))
+ if not text:
+ continue
+ user = node.get('user') or {}
+ comments.append({
+ 'author': user.get('displayName'),
+ 'author_id': user.get('username'),
+ 'text': text,
+ 'timestamp': parse_iso8601(node.get('created')),
+ })
+
+ tags = []
+ for t in cloudcast.get('tags'):
+ tag = try_get(t, lambda x: x['tag']['name'], compat_str)
+ if not tag:
+ tags.append(tag)
+
+ get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
+
+ owner = cloudcast.get('owner') or {}
+
+ return {
+ 'id': track_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': cloudcast.get('description'),
+ 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
+ 'uploader': owner.get('displayName'),
+ 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
+ 'uploader_id': owner.get('username'),
+ 'uploader_url': owner.get('url'),
+ 'duration': int_or_none(cloudcast.get('audioLength')),
+ 'view_count': int_or_none(cloudcast.get('plays')),
+ 'like_count': get_count('favorites'),
+ 'repost_count': get_count('reposts'),
+ 'comment_count': get_count('comments'),
+ 'comments': comments,
+ 'tags': tags,
+ 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
+ }
+
+
+class MixcloudPlaylistBaseIE(MixcloudBaseIE):
+ def _get_cloudcast(self, node):
+ return node
+
+ def _get_playlist_title(self, title, slug):
+ return title
+
+ def _real_extract(self, url):
+ username, slug = re.match(self._VALID_URL, url).groups()
+ username = compat_urllib_parse_unquote(username)
+ if not slug:
+ slug = 'uploads'
+ else:
+ slug = compat_urllib_parse_unquote(slug)
+ playlist_id = '%s_%s' % (username, slug)
+
+ is_playlist_type = self._ROOT_TYPE == 'playlist'
+ playlist_type = 'items' if is_playlist_type else slug
+ list_filter = ''
+
+ has_next_page = True
+ entries = []
+ while has_next_page:
+ playlist = self._call_api(
+ self._ROOT_TYPE, '''%s
+ %s
+ %s(first: 100%s) {
+ edges {
+ node {
+ %s
+ }
+ }
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
+ playlist_id, username, slug if is_playlist_type else None)
+
+ items = playlist.get(playlist_type) or {}
+ for edge in items.get('edges', []):
+ cloudcast = self._get_cloudcast(edge.get('node') or {})
+ cloudcast_url = cloudcast.get('url')
+ if not cloudcast_url:
+ continue
+ slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+ owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
+ video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
+ entries.append(self.url_result(
+ cloudcast_url, MixcloudIE.ie_key(), video_id))
+
+ page_info = items['pageInfo']
+ has_next_page = page_info['hasNextPage']
+ list_filter = ', after: "%s"' % page_info['endCursor']
+
+ return self.playlist_result(
+ entries, playlist_id,
+ self._get_playlist_title(playlist[self._TITLE_KEY], slug),
+ playlist.get(self._DESCRIPTION_KEY))
+
+
+class MixcloudUserIE(MixcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
+ IE_NAME = 'mixcloud:user'
+
+ _TESTS = [{
+ 'url': 'http://www.mixcloud.com/dholbach/',
+ 'info_dict': {
+ 'id': 'dholbach_uploads',
+ 'title': 'Daniel Holbach (uploads)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/uploads/',
+ 'info_dict': {
+ 'id': 'dholbach_uploads',
+ 'title': 'Daniel Holbach (uploads)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/favorites/',
+ 'info_dict': {
+ 'id': 'dholbach_favorites',
+ 'title': 'Daniel Holbach (favorites)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 396,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/listens/',
+ 'info_dict': {
+ 'id': 'dholbach_listens',
+ 'title': 'Daniel Holbach (listens)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 1623,
+ 'skip': 'Large list',
+ }, {
+ 'url': 'https://www.mixcloud.com/FirstEar/stream/',
+ 'info_dict': {
+ 'id': 'FirstEar_stream',
+ 'title': 'First Ear (stream)',
+ 'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
+ },
+ 'playlist_mincount': 271,
+ }]
+
+ _TITLE_KEY = 'displayName'
+ _DESCRIPTION_KEY = 'biog'
+ _ROOT_TYPE = 'user'
+ _NODE_TEMPLATE = '''slug
+ url
+ owner { username }'''
+
+ def _get_playlist_title(self, title, slug):
+ return '%s (%s)' % (title, slug)
+
+
+class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
+ IE_NAME = 'mixcloud:playlist'
+
+ _TESTS = [{
+ 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
+ 'info_dict': {
+ 'id': 'maxvibes_jazzcat-on-ness-radio',
+ 'title': 'Ness Radio sessions',
+ },
+ 'playlist_mincount': 59,
+ }]
+ _TITLE_KEY = 'name'
+ _DESCRIPTION_KEY = 'description'
+ _ROOT_TYPE = 'playlist'
+ _NODE_TEMPLATE = '''cloudcast {
+ slug
+ url
+ owner { username }
+ }'''
+
+ def _get_cloudcast(self, node):
+ return node.get('cloudcast') or {}
diff --git a/hypervideo_dl/extractor/mlb.py b/hypervideo_dl/extractor/mlb.py
new file mode 100644
index 0000000..b69301d
--- /dev/null
+++ b/hypervideo_dl/extractor/mlb.py
@@ -0,0 +1,267 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ try_get,
+)
+
+
+class MLBBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ video = self._download_video_data(display_id)
+ video_id = video['id']
+ title = video['title']
+ feed = self._get_feed(video)
+
+ formats = []
+ for playback in (feed.get('playbacks') or []):
+ playback_url = playback.get('url')
+ if not playback_url:
+ continue
+ name = playback.get('name')
+ ext = determine_ext(playback_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ playback_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id=name, fatal=False))
+ else:
+ f = {
+ 'format_id': name,
+ 'url': playback_url,
+ }
+ mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(3)),
+ 'tbr': int(mobj.group(1)),
+ 'width': int(mobj.group(2)),
+ })
+ mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
+ if mobj:
+ f.update({
+ 'fps': int(mobj.group(3)),
+ 'height': int(mobj.group(2)),
+ 'tbr': int(mobj.group(4)),
+ 'width': int(mobj.group(1)),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
+ src = cut.get('src')
+ if not src:
+ continue
+ thumbnails.append({
+ 'height': int_or_none(cut.get('height')),
+ 'url': src,
+ 'width': int_or_none(cut.get('width')),
+ })
+
+ language = (video.get('language') or 'EN').lower()
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': video.get('description'),
+ 'duration': parse_duration(feed.get('duration')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
+ 'subtitles': self._extract_mlb_subtitles(feed, language),
+ }
+
+
+class MLBIE(MLBBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[\da-z_-]+\.)*mlb\.com/
+ (?:
+ (?:
+ (?:[^/]+/)*video/[^/]+/c-|
+ (?:
+ shared/video/embed/(?:embed|m-internal-embed)\.html|
+ (?:[^/]+/)+(?:play|index)\.jsp|
+ )\?.*?\bcontent_id=
+ )
+ (?P<id>\d+)
+ )
+ '''
+ _TESTS = [
+ {
+ 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
+ 'md5': '632358dacfceec06bad823b83d21df2d',
+ 'info_dict': {
+ 'id': '34698933',
+ 'ext': 'mp4',
+ 'title': "Ackley's spectacular catch",
+ 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
+ 'duration': 66,
+ 'timestamp': 1405995000,
+ 'upload_date': '20140722',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
+ 'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
+ 'info_dict': {
+ 'id': '34496663',
+ 'ext': 'mp4',
+ 'title': 'Stanton prepares for Derby',
+ 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
+ 'duration': 46,
+ 'timestamp': 1405120200,
+ 'upload_date': '20140711',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
+ 'md5': '99bb9176531adc600b90880fb8be9328',
+ 'info_dict': {
+ 'id': '34578115',
+ 'ext': 'mp4',
+ 'title': 'Cespedes repeats as Derby champ',
+ 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
+ 'duration': 488,
+ 'timestamp': 1405414336,
+ 'upload_date': '20140715',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
+ 'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
+ 'info_dict': {
+ 'id': '34577915',
+ 'ext': 'mp4',
+ 'title': 'Bautista on Home Run Derby',
+ 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
+ 'duration': 52,
+ 'timestamp': 1405405122,
+ 'upload_date': '20140715',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
+ 'only_matching': True,
+ },
+ {
+ # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
+ 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
+ 'only_matching': True,
+ },
+ ]
+ _TIMESTAMP_KEY = 'date'
+
+ @staticmethod
+ def _get_feed(video):
+ return video
+
+ @staticmethod
+ def _extract_mlb_subtitles(feed, language):
+ subtitles = {}
+ for keyword in (feed.get('keywordsAll') or []):
+ keyword_type = keyword.get('type')
+ if keyword_type and keyword_type.startswith('closed_captions_location_'):
+ cc_location = keyword.get('value')
+ if cc_location:
+ subtitles.setdefault(language, []).append({
+ 'url': cc_location,
+ })
+ return subtitles
+
+ def _download_video_data(self, display_id):
+ return self._download_json(
+ 'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
+ display_id)
+
+
+class MLBVideoIE(MLBBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
+ 'md5': '632358dacfceec06bad823b83d21df2d',
+ 'info_dict': {
+ 'id': 'c04a8863-f569-42e6-9f87-992393657614',
+ 'ext': 'mp4',
+ 'title': "Ackley's spectacular catch",
+ 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
+ 'duration': 66,
+ 'timestamp': 1405995000,
+ 'upload_date': '20140722',
+ 'thumbnail': r're:^https?://.+',
+ },
+ }
+ _TIMESTAMP_KEY = 'timestamp'
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
+
+ @staticmethod
+ def _get_feed(video):
+ return video['feeds'][0]
+
+ @staticmethod
+ def _extract_mlb_subtitles(feed, language):
+ subtitles = {}
+ for cc_location in (feed.get('closedCaptions') or []):
+ subtitles.setdefault(language, []).append({
+ 'url': cc_location,
+ })
+
+ def _download_video_data(self, display_id):
+ # https://www.mlb.com/data-service/en/videos/[SLUG]
+ return self._download_json(
+ 'https://fastball-gateway.mlb.com/graphql',
+ display_id, query={
+ 'query': '''{
+ mediaPlayback(ids: "%s") {
+ description
+ feeds(types: CMS) {
+ closedCaptions
+ duration
+ image {
+ cuts {
+ width
+ height
+ src
+ }
+ }
+ playbacks {
+ name
+ url
+ }
+ }
+ id
+ timestamp
+ title
+ }
+}''' % display_id,
+ })['data']['mediaPlayback'][0]
diff --git a/hypervideo_dl/extractor/mnet.py b/hypervideo_dl/extractor/mnet.py
new file mode 100644
index 0000000..0e26ca1
--- /dev/null
+++ b/hypervideo_dl/extractor/mnet.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class MnetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.mnet.com/tv/vod/171008',
+ 'info_dict': {
+ 'id': '171008',
+ 'title': 'SS_이해인@히든박스',
+ 'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55',
+ 'duration': 88,
+ 'upload_date': '20151231',
+ 'timestamp': 1451564040,
+ 'age_limit': 0,
+ 'thumbnails': 'mincount:5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'ext': 'flv',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://mnet.interest.me/tv/vod/172790',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # TODO: extract rtmp formats
+ # no stype -> rtmp url
+ # stype=H -> m3u8 url
+ # stype=M -> mpd url
+ info = self._download_json(
+ 'http://content.api.mnet.com/player/vodConfig',
+ video_id, 'Downloading vod config JSON', query={
+ 'id': video_id,
+ 'ctype': 'CLIP',
+ 'stype': 'H',
+ })['data']['info']
+
+ title = info['title']
+
+ cdn_data = self._download_json(
+ info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0]
+ m3u8_url = cdn_data['url']
+ token = cdn_data.get('token')
+ if token and token != '-':
+ m3u8_url += '?' + token
+ formats = self._extract_wowza_formats(
+ m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
+ self._sort_formats(formats)
+
+ description = info.get('ment')
+ duration = parse_duration(info.get('time'))
+ timestamp = parse_iso8601(info.get('date'), delimiter=' ')
+ age_limit = info.get('adult')
+ if age_limit is not None:
+ age_limit = 0 if age_limit == 'N' else 18
+ thumbnails = [{
+ 'id': thumb_format,
+ 'url': thumb['url'],
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ } for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': age_limit,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/moevideo.py b/hypervideo_dl/extractor/moevideo.py
new file mode 100644
index 0000000..eb9b4ce
--- /dev/null
+++ b/hypervideo_dl/extractor/moevideo.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+)
+
+
+class MoeVideoIE(InfoExtractor):
+ IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
+ _VALID_URL = r'''(?x)
+ https?://(?P<host>(?:www\.)?
+ (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
+ (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
+ _API_URL = 'http://api.letitbit.net/'
+ _API_KEY = 'tVL0gjqo5'
+ _TESTS = [
+ {
+ 'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
+ 'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
+ 'info_dict': {
+ 'id': '00297.0036103fe3d513ef27915216fd29',
+ 'ext': 'flv',
+ 'title': 'Sink cut out machine',
+ 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'width': 540,
+ 'height': 360,
+ 'duration': 179,
+ 'filesize': 17822500,
+ },
+ 'skip': 'Video has been removed',
+ },
+ {
+ 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
+ 'md5': '74f0a014d5b661f0f0e2361300d1620e',
+ 'info_dict': {
+ 'id': '77107.7f325710a627383d40540d8e991a',
+ 'ext': 'flv',
+ 'title': 'Operacion Condor.',
+ 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'width': 480,
+ 'height': 296,
+ 'duration': 6027,
+ 'filesize': 588257923,
+ },
+ 'skip': 'Video has been removed',
+ },
+ ]
+
+ def _real_extract(self, url):
+ host, video_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(
+ 'http://%s/video/%s' % (host, video_id),
+ video_id, 'Downloading webpage')
+
+ title = self._og_search_title(webpage)
+
+ embed_webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (host, video_id),
+ video_id, 'Downloading embed webpage')
+ video = self._parse_json(self._search_regex(
+ r'mvplayer\("#player"\s*,\s*({.+})',
+ embed_webpage, 'mvplayer'), video_id)['video']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
+ 'description': clean_html(self._og_search_description(webpage)),
+ 'duration': int_or_none(self._og_search_property('video:duration', webpage)),
+ 'url': video['ourUrl'],
+ }
diff --git a/hypervideo_dl/extractor/mofosex.py b/hypervideo_dl/extractor/mofosex.py
new file mode 100644
index 0000000..5234cac
--- /dev/null
+++ b/hypervideo_dl/extractor/mofosex.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_to_int,
+ unified_strdate,
+)
+from .keezmovies import KeezMoviesIE
+
+
+class MofosexIE(KeezMoviesIE):
+ _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
+ 'md5': '558fcdafbb63a87c019218d6e49daf8a',
+ 'info_dict': {
+ 'id': '318131',
+ 'display_id': 'amateur-teen-playing-and-masturbating-318131',
+ 'ext': 'mp4',
+ 'title': 'amateur teen playing and masturbating',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20121114',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ # This video is no longer available
+ 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ webpage, info = self._extract_info(url)
+
+ view_count = str_to_int(self._search_regex(
+ r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
+ like_count = int_or_none(self._search_regex(
+ r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
+ 'like count', fatal=False))
+ dislike_count = int_or_none(self._search_regex(
+ r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
+ 'like count', fatal=False))
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
+
+ info.update({
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'upload_date': upload_date,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+
+ return info
+
+
+class MofosexEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
+ ie=MofosexIE.ie_key(), video_id=video_id)
diff --git a/hypervideo_dl/extractor/mojvideo.py b/hypervideo_dl/extractor/mojvideo.py
new file mode 100644
index 0000000..165e658
--- /dev/null
+++ b/hypervideo_dl/extractor/mojvideo.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+)
+
+
+class MojvideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
+ _TEST = {
+ 'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
+ 'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
+ 'info_dict': {
+ 'id': '3d1ed4497707730b2906',
+ 'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
+ 'ext': 'mp4',
+ 'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
+ 'thumbnail': r're:^http://.*\.jpg$',
+ 'duration': 242,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ # XML is malformed
+ playerapi = self._download_webpage(
+ 'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
+
+ if '<error>true</error>' in playerapi:
+ error_desc = self._html_search_regex(
+ r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', playerapi, 'title')
+ video_url = self._html_search_regex(
+ r'<file>([^<]+)</file>', playerapi, 'video URL')
+ thumbnail = self._html_search_regex(
+ r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/morningstar.py b/hypervideo_dl/extractor/morningstar.py
new file mode 100644
index 0000000..0093bcd
--- /dev/null
+++ b/hypervideo_dl/extractor/morningstar.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MorningstarIE(InfoExtractor):
+ IE_DESC = 'morningstar.com'
+ _VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
+ 'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
+ 'info_dict': {
+ 'id': '615869',
+ 'ext': 'mp4',
+ 'title': 'Get Ahead of the Curve on 2013 Taxes',
+ 'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
+ 'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
+ }
+ }, {
+ 'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(
+ r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
+ video_url = self._html_search_regex(
+ r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
+ webpage, 'video URL')
+ thumbnail = self._html_search_regex(
+ r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+ description = self._html_search_regex(
+ r'<div id="mstarDeck".*?>(.*?)</div>',
+ webpage, 'description', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/motherless.py b/hypervideo_dl/extractor/motherless.py
new file mode 100644
index 0000000..ef1e081
--- /dev/null
+++ b/hypervideo_dl/extractor/motherless.py
@@ -0,0 +1,232 @@
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ InAdvancePagedList,
+ orderedSet,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class MotherlessIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://motherless.com/AC3FFE1',
+ 'md5': '310f62e325a9fafe64f68c0bccb6e75f',
+ 'info_dict': {
+ 'id': 'AC3FFE1',
+ 'ext': 'mp4',
+ 'title': 'Fucked in the ass while playing PS3',
+ 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
+ 'upload_date': '20100913',
+ 'uploader_id': 'famouslyfuckedup',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://motherless.com/532291B',
+ 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+ 'info_dict': {
+ 'id': '532291B',
+ 'ext': 'mp4',
+ 'title': 'Amazing girl playing the omegle game, PERFECT!',
+ 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
+ 'game', 'hairy'],
+ 'upload_date': '20140622',
+ 'uploader_id': 'Sulivana7x',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': '404',
+ }, {
+ 'url': 'http://motherless.com/g/cosplay/633979F',
+ 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+ 'info_dict': {
+ 'id': '633979F',
+ 'ext': 'mp4',
+ 'title': 'Turtlette',
+ 'categories': ['superheroine heroine superher'],
+ 'upload_date': '20140827',
+ 'uploader_id': 'shade0230',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }, {
+ # no keywords
+ 'url': 'http://motherless.com/8B4BBC1',
+ 'only_matching': True,
+ }, {
+ # see https://motherless.com/videos/recent for recent videos with
+ # uploaded date in "ago" format
+ 'url': 'https://motherless.com/3C3E2CF',
+ 'info_dict': {
+ 'id': '3C3E2CF',
+ 'ext': 'mp4',
+ 'title': 'a/ Hot Teens',
+ 'categories': list,
+ 'upload_date': '20210104',
+ 'uploader_id': 'yonbiw',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(p in webpage for p in (
+ '<title>404 - MOTHERLESS.COM<',
+ ">The page you're looking for cannot be found.<")):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ if '>The content you are trying to view is for friends only.' in webpage:
+ raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+
+ title = self._html_search_regex(
+ (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
+ r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
+ video_url = (self._html_search_regex(
+ (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
+ webpage, 'video URL', default=None, group='url')
+ or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
+ age_limit = self._rta_search(webpage)
+ view_count = str_to_int(self._html_search_regex(
+ (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._html_search_regex(
+ (r'>([\d,.]+)\s+Favorites<',
+ r'<strong>Favorited</strong>\s+([^<]+)<'),
+ webpage, 'like count', fatal=False))
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
+ 'upload date', default=None))
+ if not upload_date:
+ uploaded_ago = self._search_regex(
+ r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
+ default=None)
+ if uploaded_ago:
+ delta = int(uploaded_ago[:-1])
+ _AGO_UNITS = {
+ 'h': 'hours',
+ 'd': 'days',
+ }
+ kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
+ upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+
+ comment_count = webpage.count('class="media-comment-contents"')
+ uploader_id = self._html_search_regex(
+ r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+ webpage, 'uploader_id')
+
+ categories = self._html_search_meta('keywords', webpage, default=None)
+ if categories:
+ categories = [cat.strip() for cat in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'categories': categories,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'age_limit': age_limit,
+ 'url': video_url,
+ }
+
+
+class MotherlessGroupIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
+ _TESTS = [{
+ 'url': 'http://motherless.com/g/movie_scenes',
+ 'info_dict': {
+ 'id': 'movie_scenes',
+ 'title': 'Movie Scenes',
+ 'description': 'Hot and sexy scenes from "regular" movies... '
+ 'Beautiful actresses fully nude... A looot of '
+ 'skin! :)Enjoy!',
+ },
+ 'playlist_mincount': 662,
+ }, {
+ 'url': 'http://motherless.com/gv/sex_must_be_funny',
+ 'info_dict': {
+ 'id': 'sex_must_be_funny',
+ 'title': 'Sex must be funny',
+ 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
+ 'any kind!'
+ },
+ 'playlist_mincount': 9,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if MotherlessIE.suitable(url)
+ else super(MotherlessGroupIE, cls).suitable(url))
+
+ def _extract_entries(self, webpage, base):
+ entries = []
+ for mobj in re.finditer(
+ r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
+ webpage):
+ video_url = compat_urlparse.urljoin(base, mobj.group('href'))
+ if not MotherlessIE.suitable(video_url):
+ continue
+ video_id = MotherlessIE._match_id(video_url)
+ title = mobj.group('title')
+ entries.append(self.url_result(
+ video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
+ video_title=title))
+ # Alternative fallback
+ if not entries:
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(base, '/' + entry_id),
+ ie=MotherlessIE.ie_key(), video_id=entry_id)
+ for entry_id in orderedSet(re.findall(
+ r'data-codename=["\']([A-Z0-9]+)', webpage))]
+ return entries
+
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+ page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
+ webpage = self._download_webpage(page_url, group_id)
+ title = self._search_regex(
+ r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
+ description = self._html_search_meta(
+ 'description', webpage, fatal=False)
+ page_count = self._int(self._search_regex(
+ r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
+ webpage, 'page_count'), 'page_count')
+ PAGE_SIZE = 80
+
+ def _get_page(idx):
+ webpage = self._download_webpage(
+ page_url, group_id, query={'page': idx + 1},
+ note='Downloading page %d/%d' % (idx + 1, page_count)
+ )
+ for entry in self._extract_entries(webpage, url):
+ yield entry
+
+ playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+
+ return {
+ '_type': 'playlist',
+ 'id': group_id,
+ 'title': title,
+ 'description': description,
+ 'entries': playlist
+ }
diff --git a/hypervideo_dl/extractor/motorsport.py b/hypervideo_dl/extractor/motorsport.py
new file mode 100644
index 0000000..c9d1ab6
--- /dev/null
+++ b/hypervideo_dl/extractor/motorsport.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+)
+
+
+class MotorsportIE(InfoExtractor):
+ IE_DESC = 'motorsport.com'
+ _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
+ _TEST = {
+ 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
+ 'info_dict': {
+ 'id': '2-T3WuR-KMM',
+ 'ext': 'mp4',
+ 'title': 'Red Bull Racing: 2014 Rules Explained',
+ 'duration': 208,
+ 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
+ 'uploader': 'mcomstaff',
+ 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
+ 'upload_date': '20140903',
+ 'thumbnail': r're:^https?://.+\.jpg$'
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ iframe_path = self._html_search_regex(
+ r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
+ 'iframe path')
+ iframe = self._download_webpage(
+ compat_urlparse.urljoin(url, iframe_path), display_id,
+ 'Downloading iframe')
+ youtube_id = self._search_regex(
+ r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'url': 'https://youtube.com/watch?v=%s' % youtube_id,
+ }
diff --git a/hypervideo_dl/extractor/movieclips.py b/hypervideo_dl/extractor/movieclips.py
new file mode 100644
index 0000000..5453da1
--- /dev/null
+++ b/hypervideo_dl/extractor/movieclips.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ smuggle_url,
+ float_or_none,
+ parse_iso8601,
+ update_url_query,
+)
+
+
+class MovieClipsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
+ _TEST = {
+ 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
+ 'md5': '42b5a0352d4933a7bd54f2104f481244',
+ 'info_dict': {
+ 'id': 'pKIGmG83AqD9',
+ 'ext': 'mp4',
+ 'title': 'Warcraft Trailer 1',
+ 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1446843055,
+ 'upload_date': '20151106',
+ 'uploader': 'Movieclips',
+ },
+ 'add_ie': ['ThePlatform'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video = next(v for v in self._parse_json(self._search_regex(
+ r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
+ webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(update_url_query(
+ video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
+ 'title': self._og_search_title(webpage),
+ 'description': self._html_search_meta('description', webpage),
+ 'duration': float_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('dateCreated')),
+ 'thumbnail': video.get('defaultImage'),
+ 'uploader': video.get('provider'),
+ }
diff --git a/hypervideo_dl/extractor/moviezine.py b/hypervideo_dl/extractor/moviezine.py
new file mode 100644
index 0000000..85cc6e2
--- /dev/null
+++ b/hypervideo_dl/extractor/moviezine.py
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MoviezineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
+
+ _TEST = {
+ 'url': 'http://www.moviezine.se/video/205866',
+ 'info_dict': {
+ 'id': '205866',
+ 'ext': 'mp4',
+ 'title': 'Oculus - Trailer 1',
+ 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
+ 'quality': 0,
+ 'ext': 'mp4',
+ }]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
+ 'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ }
diff --git a/hypervideo_dl/extractor/movingimage.py b/hypervideo_dl/extractor/movingimage.py
new file mode 100644
index 0000000..4f62d62
--- /dev/null
+++ b/hypervideo_dl/extractor/movingimage.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unescapeHTML,
+ parse_duration,
+)
+
+
+class MovingImageIE(InfoExtractor):
+ _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://movingimage.nls.uk/film/3561',
+ 'md5': '4caa05c2b38453e6f862197571a7be2f',
+ 'info_dict': {
+ 'id': '3561',
+ 'ext': 'mp4',
+ 'title': 'SHETLAND WOOL',
+ 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+ 'duration': 900,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = self._extract_m3u8_formats(
+ self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
+ video_id, ext='mp4', entry_protocol='m3u8_native')
+
+ def search_field(field_name, fatal=False):
+ return self._search_regex(
+ r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+ webpage, 'title', fatal=fatal)
+
+ title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+ description = unescapeHTML(search_field('Description'))
+ duration = parse_duration(search_field('Running time'))
+ thumbnail = self._search_regex(
+ r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/msn.py b/hypervideo_dl/extractor/msn.py
new file mode 100644
index 0000000..e59b0b7
--- /dev/null
+++ b/hypervideo_dl/extractor/msn.py
@@ -0,0 +1,171 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class MSNIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
+ 'md5': '087548191d273c5c55d05028f8d2cbcd',
+ 'info_dict': {
+ 'id': 'BBPxU6d',
+ 'display_id': '7-ways-to-get-rid-of-chest-congestion',
+ 'ext': 'mp4',
+ 'title': 'Seven ways to get rid of chest congestion',
+ 'description': '7 Ways to Get Rid of Chest Congestion',
+ 'duration': 88,
+ 'uploader': 'Health',
+ 'uploader_id': 'BBPrMqa',
+ },
+ }, {
+ # Article, multiple Dailymotion Embeds
+ 'url': 'https://www.msn.com/en-in/money/sports/hottest-football-wags-greatest-footballers-turned-managers-and-more/ar-BBpc7Nl',
+ 'info_dict': {
+ 'id': 'BBpc7Nl',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
+ 'only_matching': True,
+ }, {
+ # Vidible(AOL) Embed
+ 'url': 'https://www.msn.com/en-us/money/other/jupiter-is-about-to-come-so-close-you-can-see-its-moons-with-binoculars/vi-AACqsHR',
+ 'only_matching': True,
+ }, {
+ # Dailymotion Embed
+ 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
+ 'only_matching': True,
+ }, {
+ # YouTube Embed
+ 'url': 'https://www.msn.com/en-in/money/news/meet-vikram-%E2%80%94-chandrayaan-2s-lander/vi-AAGUr0v',
+ 'only_matching': True,
+ }, {
+ # NBCSports Embed
+ 'url': 'https://www.msn.com/en-us/money/football_nfl/week-13-preview-redskins-vs-panthers/vi-BBXsCDb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, page_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for _, metadata in re.findall(r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', webpage):
+ video = self._parse_json(unescapeHTML(metadata), display_id)
+
+ provider_id = video.get('providerId')
+ player_name = video.get('playerName')
+ if player_name and provider_id:
+ entry = None
+ if player_name == 'AOL':
+ if provider_id.startswith('http'):
+ provider_id = self._search_regex(
+ r'https?://delivery\.vidible\.tv/video/redirect/([0-9a-f]{24})',
+ provider_id, 'vidible id')
+ entry = self.url_result(
+ 'aol-video:' + provider_id, 'Aol', provider_id)
+ elif player_name == 'Dailymotion':
+ entry = self.url_result(
+ 'https://www.dailymotion.com/video/' + provider_id,
+ 'Dailymotion', provider_id)
+ elif player_name == 'YouTube':
+ entry = self.url_result(
+ provider_id, 'Youtube', provider_id)
+ elif player_name == 'NBCSports':
+ entry = self.url_result(
+ 'http://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/' + provider_id,
+ 'NBCSportsVPlayer', provider_id)
+ if entry:
+ entries.append(entry)
+ continue
+
+ video_id = video['uuid']
+ title = video['title']
+
+ formats = []
+ for file_ in video.get('videoFiles', []):
+ format_url = file_.get('url')
+ if not format_url:
+ continue
+ if 'format=m3u8-aapl' in format_url:
+ # m3u8_native should not be used here until
+ # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif 'format=mpd-time-csf' in format_url:
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, 'dash', fatal=False))
+ elif '.ism' in format_url:
+ if format_url.endswith('.ism'):
+ format_url += '/manifest'
+ formats.extend(self._extract_ism_formats(
+ format_url, display_id, 'mss', fatal=False))
+ else:
+ format_id = file_.get('formatCode')
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'width': int_or_none(file_.get('width')),
+ 'height': int_or_none(file_.get('height')),
+ 'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
+ 'preference': 1 if format_id == '1001' else None,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for file_ in video.get('files', []):
+ format_url = file_.get('url')
+ format_code = file_.get('formatCode')
+ if not format_url or not format_code:
+ continue
+ if compat_str(format_code) == '3100':
+ subtitles.setdefault(file_.get('culture', 'en'), []).append({
+ 'ext': determine_ext(format_url, 'ttml'),
+ 'url': format_url,
+ })
+
+ entries.append({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('headlineImage', {}).get('url'),
+ 'duration': int_or_none(video.get('durationSecs')),
+ 'uploader': video.get('sourceFriendly'),
+ 'uploader_id': video.get('providerId'),
+ 'creator': video.get('creator'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+
+ if not entries:
+ error = unescapeHTML(self._search_regex(
+ r'data-error=(["\'])(?P<error>.+?)\1',
+ webpage, 'error', group='error'))
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ return self.playlist_result(entries, page_id)
diff --git a/hypervideo_dl/extractor/mtv.py b/hypervideo_dl/extractor/mtv.py
new file mode 100644
index 0000000..5a5205c
--- /dev/null
+++ b/hypervideo_dl/extractor/mtv.py
@@ -0,0 +1,488 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_xpath,
+)
+from ..utils import (
+ ExtractorError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ HEADRequest,
+ RegexNotFoundError,
+ sanitized_Request,
+ strip_or_none,
+ timeconvert,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+ url_basename,
+ xpath_text,
+)
+
+
+def _media_xml_tag(tag):
+ return '{http://search.yahoo.com/mrss/}%s' % tag
+
+
+class MTVServicesInfoExtractor(InfoExtractor):
+ _MOBILE_TEMPLATE = None
+ _LANG = None
+
+ @staticmethod
+ def _id_from_uri(uri):
+ return uri.split(':')[-1]
+
+ @staticmethod
+ def _remove_template_parameter(url):
+ # Remove the templates, like &device={device}
+ return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
+
+ def _get_feed_url(self, uri):
+ return self._FEED_URL
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+ thumb_node = itemdoc.find(search_path)
+ if thumb_node is None:
+ return None
+ return thumb_node.get('url') or thumb_node.text or None
+
+ def _extract_mobile_video_formats(self, mtvn_id):
+ webpage_url = self._MOBILE_TEMPLATE % mtvn_id
+ req = sanitized_Request(webpage_url)
+ # Otherwise we get a webpage that would execute some javascript
+ req.add_header('User-Agent', 'curl/7')
+ webpage = self._download_webpage(req, mtvn_id,
+ 'Downloading mobile page')
+ metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
+ req = HEADRequest(metrics_url)
+ response = self._request_webpage(req, mtvn_id, 'Resolving url')
+ url = response.geturl()
+ # Transform the url to get the best quality:
+ url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
+ return [{'url': url, 'ext': 'mp4'}]
+
+ def _extract_video_formats(self, mdoc, mtvn_id, video_id):
+ if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
+ if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
+ self.to_screen('The normal version is not available from your '
+ 'country, trying with the mobile version')
+ return self._extract_mobile_video_formats(mtvn_id)
+ raise ExtractorError('This video is not available from your country.',
+ expected=True)
+
+ formats = []
+ for rendition in mdoc.findall('.//rendition'):
+ if rendition.get('method') == 'hls':
+ hls_url = rendition.find('./src').text
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ # fms
+ try:
+ _, _, ext = rendition.attrib['type'].partition('/')
+ rtmp_video_url = rendition.find('./src').text
+ if 'error_not_available.swf' in rtmp_video_url:
+ raise ExtractorError(
+ '%s said: video is not available' % self.IE_NAME,
+ expected=True)
+ if rtmp_video_url.endswith('siteunavail.png'):
+ continue
+ formats.extend([{
+ 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
+ 'url': rtmp_video_url,
+ 'format_id': '-'.join(filter(None, [
+ 'rtmp' if rtmp_video_url.startswith('rtmp') else None,
+ rendition.get('bitrate')])),
+ 'width': int(rendition.get('width')),
+ 'height': int(rendition.get('height')),
+ }])
+ except (KeyError, TypeError):
+ raise ExtractorError('Invalid rendition field.')
+ if formats:
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_subtitles(self, mdoc, mtvn_id):
+ subtitles = {}
+ for transcript in mdoc.findall('.//transcript'):
+ if transcript.get('kind') != 'captions':
+ continue
+ lang = transcript.get('srclang')
+ for typographic in transcript.findall('./typographic'):
+ sub_src = typographic.get('src')
+ if not sub_src:
+ continue
+ ext = typographic.get('format')
+ if ext == 'cea-608':
+ ext = 'scc'
+ subtitles.setdefault(lang, []).append({
+ 'url': compat_str(sub_src),
+ 'ext': ext
+ })
+ return subtitles
+
+ def _get_video_info(self, itemdoc, use_hls=True):
+ uri = itemdoc.find('guid').text
+ video_id = self._id_from_uri(uri)
+ self.report_extraction(video_id)
+ content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
+ mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
+ mediagen_url = mediagen_url.replace('device={device}', '')
+ if 'acceptMethods' not in mediagen_url:
+ mediagen_url += '&' if '?' in mediagen_url else '?'
+ mediagen_url += 'acceptMethods='
+ mediagen_url += 'hls' if use_hls else 'fms'
+
+ mediagen_doc = self._download_xml(
+ mediagen_url, video_id, 'Downloading video urls', fatal=False)
+
+ if mediagen_doc is False:
+ return None
+
+ item = mediagen_doc.find('./video/item')
+ if item is not None and item.get('type') == 'text':
+ message = '%s returned error: ' % self.IE_NAME
+ if item.get('code') is not None:
+ message += '%s - ' % item.get('code')
+ message += item.text
+ raise ExtractorError(message, expected=True)
+
+ description = strip_or_none(xpath_text(itemdoc, 'description'))
+
+ timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
+
+ title_el = None
+ if title_el is None:
+ title_el = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:video_title')
+ if title_el is None:
+ title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
+ if title_el is None:
+ title_el = itemdoc.find(compat_xpath('.//title'))
+ if title_el.text is None:
+ title_el = None
+
+ title = title_el.text
+ if title is None:
+ raise ExtractorError('Could not find video title')
+ title = title.strip()
+
+ # This a short id that's used in the webpage urls
+ mtvn_id = None
+ mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:id')
+ if mtvn_id_node is not None:
+ mtvn_id = mtvn_id_node.text
+
+ formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
+
+ # Some parts of complete video may be missing (e.g. missing Act 3 in
+ # http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
+ if not formats:
+ return None
+
+ self._sort_formats(formats)
+
+ return {
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
+ 'id': video_id,
+ 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
+ 'description': description,
+ 'duration': float_or_none(content_el.attrib.get('duration')),
+ 'timestamp': timestamp,
+ }
+
+ def _get_feed_query(self, uri):
+ data = {'uri': uri}
+ if self._LANG:
+ data['lang'] = self._LANG
+ return data
+
+ def _get_videos_info(self, uri, use_hls=True):
+ video_id = self._id_from_uri(uri)
+ feed_url = self._get_feed_url(uri)
+ info_url = update_url_query(feed_url, self._get_feed_query(uri))
+ return self._get_videos_info_from_url(info_url, video_id, use_hls)
+
+ def _get_videos_info_from_url(self, url, video_id, use_hls=True):
+ idoc = self._download_xml(
+ url, video_id,
+ 'Downloading info', transform_source=fix_xml_ampersands)
+
+ title = xpath_text(idoc, './channel/title')
+ description = xpath_text(idoc, './channel/description')
+
+ entries = []
+ for item in idoc.findall('.//item'):
+ info = self._get_video_info(item, use_hls)
+ if info:
+ entries.append(info)
+
+ return self.playlist_result(
+ entries, playlist_title=title, playlist_description=description)
+
+ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
+ triforce_feed = self._parse_json(self._search_regex(
+ r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'triforce feed', default='{}'), video_id, fatal=False)
+
+ data_zone = self._search_regex(
+ r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage,
+ 'data zone', default=data_zone, group='zone')
+
+ feed_url = try_get(
+ triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
+ compat_str)
+ if not feed_url:
+ return
+
+ feed = self._download_json(feed_url, video_id, fatal=False)
+ if not feed:
+ return
+
+ return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+
+ @staticmethod
+ def _extract_child_with_type(parent, t):
+ for c in parent['children']:
+ if c.get('type') == t:
+ return c
+
+ def _extract_mgid(self, webpage):
+ try:
+ # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
+ # or http://media.mtvnservices.com/{mgid}
+ og_url = self._og_search_video_url(webpage)
+ mgid = url_basename(og_url)
+ if mgid.endswith('.swf'):
+ mgid = mgid[:-4]
+ except RegexNotFoundError:
+ mgid = None
+
+ if mgid is None or ':' not in mgid:
+ mgid = self._search_regex(
+ [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
+ webpage, 'mgid', default=None)
+
+ if not mgid:
+ sm4_embed = self._html_search_meta(
+ 'sm4:video:embed', webpage, 'sm4 embed', default='')
+ mgid = self._search_regex(
+ r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
+
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+
+ if not mgid:
+ data = self._parse_json(self._search_regex(
+ r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+ main_container = self._extract_child_with_type(data, 'MainContainer')
+ ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
+ video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
+ mgid = video_player['props']['media']['video']['config']['uri']
+
+ return mgid
+
+ def _real_extract(self, url):
+ title = url_basename(url)
+ webpage = self._download_webpage(url, title)
+ mgid = self._extract_mgid(webpage)
+ videos_info = self._get_videos_info(mgid)
+ return videos_info
+
+
+class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvservices:embedded'
+ _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+
+ _TEST = {
+ # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
+ 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
+ 'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
+ 'info_dict': {
+ 'id': '1043906',
+ 'ext': 'mp4',
+ 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
+ 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
+ 'timestamp': 1400126400,
+ 'upload_date': '20140515',
+ },
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _get_feed_url(self, uri):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mgid = mobj.group('mgid')
+ return self._get_videos_info(mgid)
+
+
+class MTVIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://www.mtv.com/feeds/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
+ 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+ 'info_dict': {
+ 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+ 'ext': 'mp4',
+ 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
+ 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
+ 'timestamp': 1468846800,
+ 'upload_date': '20160718',
+ },
+ }, {
+ 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
+ 'only_matching': True,
+ }]
+
+
+class MTVJapanIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvjapan'
+ _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P<id>[0-9a-z]+)'
+
+ _TEST = {
+ 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade',
+ 'info_dict': {
+ 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5',
+ 'ext': 'mp4',
+ 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+ _GEO_COUNTRIES = ['JP']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtvjapan.com',
+ 'mgid': uri,
+ }
+
+
+class MTVVideoIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv:video'
+ _VALID_URL = r'''(?x)^https?://
+ (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
+ m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
+
+ _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+ 'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
+ 'info_dict': {
+ 'id': '853555',
+ 'ext': 'mp4',
+ 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
+ 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+ 'timestamp': 1352610000,
+ 'upload_date': '20121111',
+ },
+ },
+ ]
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ return 'http://mtv.mtvnimages.com/uri/' + uri
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('videoid')
+ uri = mobj.groupdict().get('mgid')
+ if uri is None:
+ webpage = self._download_webpage(url, video_id)
+
+ # Some videos come from Vevo.com
+ m_vevo = re.search(
+ r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
+ if m_vevo:
+ vevo_id = m_vevo.group(1)
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+
+ uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
+ return self._get_videos_info(uri)
+
+
+class MTVDEIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv.de'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum',
+ 'info_dict': {
+ 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5',
+ 'ext': 'mp4',
+ 'title': 'Traum',
+ 'description': 'Traum',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked at Travis CI',
+ }, {
+ # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
+ 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1',
+ 'info_dict': {
+ 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Teen Mom 2',
+ 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked at Travis CI',
+ }, {
+ 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3',
+ 'info_dict': {
+ 'id': 'local_playlist-4e760566473c4c8c5344',
+ 'ext': 'mp4',
+ 'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
+ 'description': 'MTV Movies Supercut',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
+ }]
+ _GEO_COUNTRIES = ['DE']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.de',
+ 'mgid': uri,
+ }
diff --git a/hypervideo_dl/extractor/muenchentv.py b/hypervideo_dl/extractor/muenchentv.py
new file mode 100644
index 0000000..2cc2bf2
--- /dev/null
+++ b/hypervideo_dl/extractor/muenchentv.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+)
+
+
+class MuenchenTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
+ IE_DESC = 'münchen.tv'
+ _TEST = {
+ 'url': 'http://www.muenchen.tv/livestream/',
+ 'info_dict': {
+ 'id': '5334',
+ 'display_id': 'live',
+ 'ext': 'mp4',
+ 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = 'live'
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._live_title(self._og_search_title(webpage))
+
+ data_js = self._search_regex(
+ r'(?s)\nplaylist:\s*(\[.*?}\]),',
+ webpage, 'playlist configuration')
+ data_json = js_to_json(data_js)
+ data = json.loads(data_json)[0]
+
+ video_id = data['mediaid']
+ thumbnail = data.get('image')
+
+ formats = []
+ for format_num, s in enumerate(data['sources']):
+ ext = determine_ext(s['file'], None)
+ label_str = s.get('label')
+ if label_str is None:
+ label_str = '_%d' % format_num
+
+ if ext is None:
+ format_id = label_str
+ else:
+ format_id = '%s-%s' % (ext, label_str)
+
+ formats.append({
+ 'url': s['file'],
+ 'tbr': int_or_none(s.get('label')),
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'preference': -100 if '.smil' in s['file'] else 0,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': True,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/mwave.py b/hypervideo_dl/extractor/mwave.py
new file mode 100644
index 0000000..a672765
--- /dev/null
+++ b/hypervideo_dl/extractor/mwave.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class MwaveIE(InfoExtractor):
+ _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
+ _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
+ _TESTS = [{
+ 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
+ # md5 is unstable
+ 'info_dict': {
+ 'id': '168859',
+ 'ext': 'flv',
+ 'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'M COUNTDOWN',
+ 'duration': 206,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ vod_info = self._download_json(
+ 'http://mwave.interest.me/onair/vod_info.m?vodtype=CL&sectorid=&endinfo=Y&id=%s' % video_id,
+ video_id, 'Download vod JSON')
+
+ formats = []
+ for num, cdn_info in enumerate(vod_info['cdn']):
+ stream_url = cdn_info.get('url')
+ if not stream_url:
+ continue
+ stream_name = cdn_info.get('name') or compat_str(num)
+ f4m_stream = self._download_json(
+ stream_url, video_id,
+ 'Download %s stream JSON' % stream_name)
+ f4m_url = f4m_stream.get('fileurl')
+ if not f4m_url:
+ continue
+ formats.extend(
+ self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': vod_info['title'],
+ 'thumbnail': vod_info.get('cover'),
+ 'uploader': vod_info.get('program_title'),
+ 'duration': parse_duration(vod_info.get('time')),
+ 'view_count': int_or_none(vod_info.get('hit')),
+ 'formats': formats,
+ }
+
+
+class MwaveMeetGreetIE(InfoExtractor):
+ _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://mwave.interest.me/meetgreet/view/256',
+ 'info_dict': {
+ 'id': '173294',
+ 'ext': 'flv',
+ 'title': '[MEET&GREET] Park BoRam',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mwave',
+ 'duration': 3634,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://mwave.interest.me/en/meetgreet/view/256',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ clip_id = self._html_search_regex(
+ r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)',
+ webpage, 'clip ID')
+ clip_url = MwaveIE._URL_TEMPLATE % clip_id
+ return self.url_result(clip_url, 'Mwave', clip_id)
diff --git a/hypervideo_dl/extractor/mychannels.py b/hypervideo_dl/extractor/mychannels.py
new file mode 100644
index 0000000..b1ffe78
--- /dev/null
+++ b/hypervideo_dl/extractor/mychannels.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MyChannelsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mychannels\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416',
+ 'md5': 'b8993daad4262dd68d89d651c0c52c45',
+ 'info_dict': {
+ 'id': 'wUUDZZep6vQD',
+ 'ext': 'mp4',
+ 'title': 'Miss Holland joins VOTE LEAVE',
+ 'description': 'Miss Holland | #13 Not a potato',
+ 'uploader': 'Miss Holland',
+ }
+ }
+
+ def _real_extract(self, url):
+ id_type, url_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, url_id)
+ video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data')
+
+ def extract_data_val(attr, fatal=False):
+ return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
+ minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'minoto:%s' % minoto_id,
+ 'id': url_id,
+ 'title': extract_data_val('title', True),
+ 'description': extract_data_val('description'),
+ 'thumbnail': extract_data_val('image'),
+ 'uploader': extract_data_val('channel'),
+ }
diff --git a/hypervideo_dl/extractor/myspace.py b/hypervideo_dl/extractor/myspace.py
new file mode 100644
index 0000000..e164d59
--- /dev/null
+++ b/hypervideo_dl/extractor/myspace.py
@@ -0,0 +1,212 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class MySpaceIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ myspace\.com/[^/]+/
+ (?P<mediatype>
+ video/[^/]+/(?P<video_id>\d+)|
+ music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
+ 'md5': '9c1483c106f4a695c47d2911feed50a7',
+ 'info_dict': {
+ 'id': '109594919',
+ 'ext': 'mp4',
+ 'title': 'Little Big Town',
+ 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
+ 'uploader': 'Five Minutes to the Stage',
+ 'uploader_id': 'fiveminutestothestage',
+ 'timestamp': 1414108751,
+ 'upload_date': '20141023',
+ },
+ }, {
+ # songs
+ 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
+ 'md5': '1d7ee4604a3da226dd69a123f748b262',
+ 'info_dict': {
+ 'id': '93388656',
+ 'ext': 'm4a',
+ 'title': 'Of weakened soul...',
+ 'uploader': 'Killsorrow',
+ 'uploader_id': 'killsorrow',
+ },
+ }, {
+ 'add_ie': ['Youtube'],
+ 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
+ 'info_dict': {
+ 'id': 'xqds0B_meys',
+ 'ext': 'webm',
+ 'title': 'Three Days Grace - Animal I Have Become',
+ 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
+ 'uploader': 'ThreeDaysGraceVEVO',
+ 'uploader_id': 'ThreeDaysGraceVEVO',
+ 'upload_date': '20091002',
+ },
+ }, {
+ 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id') or mobj.group('song_id')
+ is_song = mobj.group('mediatype').startswith('music/song')
+ webpage = self._download_webpage(url, video_id)
+ player_url = self._search_regex(
+ r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False)
+
+ def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None):
+ formats = []
+ vcodec = 'none' if is_song else None
+ if hls_stream_url:
+ formats.append({
+ 'format_id': 'hls',
+ 'url': hls_stream_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'm4a' if is_song else 'mp4',
+ 'vcodec': vcodec,
+ })
+ if stream_url and player_url:
+ rtmp_url, play_path = stream_url.split(';', 1)
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': play_path,
+ 'player_url': player_url,
+ 'protocol': 'rtmp',
+ 'ext': 'flv',
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ })
+ if http_stream_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': http_stream_url,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ })
+ return formats
+
+ if is_song:
+ # songs don't store any useful info in the 'context' variable
+ song_data = self._search_regex(
+ r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
+ webpage, 'song_data', default=None, group=0)
+ if song_data is None:
+ # some songs in an album are not playable
+ self.report_warning(
+ '%s: No downloadable song on this page' % video_id)
+ return
+
+ def search_data(name):
+ return self._search_regex(
+ r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
+ song_data, name, default='', group='data')
+ formats = formats_from_stream_urls(
+ search_data('stream-url'), search_data('hls-stream-url'),
+ search_data('http-stream-url'))
+ if not formats:
+ vevo_id = search_data('vevo-id')
+ youtube_id = search_data('youtube-id')
+ if vevo_id:
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+ elif youtube_id:
+ self.to_screen('Youtube video detected: %s' % youtube_id)
+ return self.url_result(youtube_id, ie='Youtube')
+ else:
+ raise ExtractorError(
+ 'Found song but don\'t know how to download it')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'uploader': search_data('artist-name'),
+ 'uploader_id': search_data('artist-username'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(search_data('duration')),
+ 'formats': formats,
+ }
+ else:
+ video = self._parse_json(self._search_regex(
+ r'context = ({.*?});', webpage, 'context'),
+ video_id)['video']
+ formats = formats_from_stream_urls(
+ video.get('streamUrl'), video.get('hlsStreamUrl'),
+ video.get('mp4StreamUrl'), int_or_none(video.get('width')),
+ int_or_none(video.get('height')))
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video.get('description'),
+ 'thumbnail': video.get('imageUrl'),
+ 'uploader': video.get('artistName'),
+ 'uploader_id': video.get('artistUsername'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('dateAdded')),
+ 'formats': formats,
+ }
+
+
+class MySpaceAlbumIE(InfoExtractor):
+ IE_NAME = 'MySpace:album'
+ _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
+ 'info_dict': {
+ 'title': 'Transmissions',
+ 'id': '19455773',
+ },
+ 'playlist_count': 14,
+ 'skip': 'this album is only available in some countries',
+ }, {
+ 'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
+ 'info_dict': {
+ 'title': 'The Demo',
+ 'id': '18596029',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ display_id = mobj.group('title') + playlist_id
+ webpage = self._download_webpage(url, display_id)
+ tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
+ if not tracks_paths:
+ raise ExtractorError(
+ '%s: No songs found, try using proxy' % display_id,
+ expected=True)
+ entries = [
+ self.url_result(t_path, ie=MySpaceIE.ie_key())
+ for t_path in tracks_paths]
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage),
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/myspass.py b/hypervideo_dl/extractor/myspass.py
new file mode 100644
index 0000000..db7ebc9
--- /dev/null
+++ b/hypervideo_dl/extractor/myspass.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ xpath_text,
+)
+
+
+class MySpassIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+ 'md5': '0b49f4844a068f8b33f4b7c88405862b',
+ 'info_dict': {
+ 'id': '11741',
+ 'ext': 'mp4',
+ 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
+ 'title': '17.02.2013 - Die Highlights, Teil 2',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_xml(
+ 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
+ video_id)
+
+ title = xpath_text(metadata, 'title', fatal=True)
+ video_url = xpath_text(metadata, 'url_flv', 'download url', True)
+ video_id_int = int(video_id)
+ for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
+ group_int = int(group)
+ if group_int > video_id_int:
+ video_url = video_url.replace(
+ group, compat_str(group_int // video_id_int))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': xpath_text(metadata, 'imagePreview'),
+ 'description': xpath_text(metadata, 'description'),
+ 'duration': parse_duration(xpath_text(metadata, 'duration')),
+ 'series': xpath_text(metadata, 'format'),
+ 'season_number': int_or_none(xpath_text(metadata, 'season')),
+ 'season_id': xpath_text(metadata, 'season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(xpath_text(metadata, 'episode')),
+ }
diff --git a/hypervideo_dl/extractor/myvi.py b/hypervideo_dl/extractor/myvi.py
new file mode 100644
index 0000000..75d2863
--- /dev/null
+++ b/hypervideo_dl/extractor/myvi.py
@@ -0,0 +1,111 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .vimple import SprutoBaseIE
+
+
+class MyviIE(SprutoBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ (?:www\.)?
+ myvi\.
+ (?:
+ (?:ru/player|tv)/
+ (?:
+ (?:
+ embed/html|
+ flash|
+ api/Video/Get
+ )/|
+ content/preloader\.swf\?.*\bid=
+ )|
+ ru/watch/
+ )|
+ myvi:
+ )
+ (?P<id>[\da-zA-Z_-]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
+ 'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
+ 'info_dict': {
+ 'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
+ 'ext': 'mp4',
+ 'title': 'хозяин жизни',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 25,
+ },
+ }, {
+ 'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
+ 'only_matching': True,
+ }, {
+ 'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ spruto = self._download_json(
+ 'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
+
+ return self._extract_spruto(spruto, video_id)
+
+
+class MyviEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
+ _TESTS = [{
+ 'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
+ 'info_dict': {
+ 'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
+ 'ext': 'mp4',
+ 'title': 'Твоя (original song).mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 277,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.myvi.tv/embed/%s' % video_id, video_id)
+
+ myvi_id = self._search_regex(
+ r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
+ webpage, 'video id')
+
+ return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())
diff --git a/hypervideo_dl/extractor/myvidster.py b/hypervideo_dl/extractor/myvidster.py
new file mode 100644
index 0000000..2117d30
--- /dev/null
+++ b/hypervideo_dl/extractor/myvidster.py
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MyVidsterIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
+
+ _TEST = {
+ 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
+ 'md5': '95296d0231c1363222c3441af62dc4ca',
+ 'info_dict': {
+ 'id': '3685814',
+ 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
+ 'upload_date': '20141027',
+ 'uploader': 'utkualp',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ },
+ 'add_ie': ['XHamster'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return self.url_result(self._html_search_regex(
+ r'rel="videolink" href="(?P<real_url>.*)">',
+ webpage, 'real video url'))
diff --git a/hypervideo_dl/extractor/nationalgeographic.py b/hypervideo_dl/extractor/nationalgeographic.py
new file mode 100644
index 0000000..ee12e2b
--- /dev/null
+++ b/hypervideo_dl/extractor/nationalgeographic.py
@@ -0,0 +1,82 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .fox import FOXIE
+from ..utils import (
+ smuggle_url,
+ url_basename,
+)
+
+
+class NationalGeographicVideoIE(InfoExtractor):
+ IE_NAME = 'natgeo:video'
+ _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
+
+ _TESTS = [
+ {
+ 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
+ 'md5': '730855d559abbad6b42c2be1fa584917',
+ 'info_dict': {
+ 'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
+ 'ext': 'mp4',
+ 'title': 'Mating Crabs Busted by Sharks',
+ 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
+ 'timestamp': 1423523799,
+ 'upload_date': '20150209',
+ 'uploader': 'NAGS',
+ },
+ 'add_ie': ['ThePlatform'],
+ },
+ {
+ 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
+ 'md5': '6a3105eb448c070503b3105fb9b320b5',
+ 'info_dict': {
+ 'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
+ 'ext': 'mp4',
+ 'title': 'The Real Jaws',
+ 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
+ 'timestamp': 1433772632,
+ 'upload_date': '20150608',
+ 'uploader': 'NAGS',
+ },
+ 'add_ie': ['ThePlatform'],
+ },
+ ]
+
+ def _real_extract(self, url):
+ name = url_basename(url)
+
+ webpage = self._download_webpage(url, name)
+ guid = self._search_regex(
+ r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
+ webpage, 'guid')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid,
+ {'force_smil_url': True}),
+ 'id': guid,
+ }
+
+
+class NationalGeographicTVIE(FOXIE):
+ _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
+ 'info_dict': {
+ 'id': '6a875e6e734b479beda26438c9f21138',
+ 'ext': 'mp4',
+ 'title': 'Why Nat Geo? Valley of the Boom',
+ 'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.',
+ 'timestamp': 1542662458,
+ 'upload_date': '20181119',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
+ _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
diff --git a/hypervideo_dl/extractor/naver.py b/hypervideo_dl/extractor/naver.py
new file mode 100644
index 0000000..61fc591
--- /dev/null
+++ b/hypervideo_dl/extractor/naver.py
@@ -0,0 +1,166 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ update_url_query,
+)
+
+
+class NaverBaseIE(InfoExtractor):
+ _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
+
+ def _extract_video_info(self, video_id, vid, key):
+ video_data = self._download_json(
+ 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
+ video_id, query={
+ 'key': key,
+ })
+ meta = video_data['meta']
+ title = meta['subject']
+ formats = []
+ get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or []
+
+ def extract_formats(streams, stream_type, query={}):
+ for stream in streams:
+ stream_url = stream.get('source')
+ if not stream_url:
+ continue
+ stream_url = update_url_query(stream_url, query)
+ encoding_option = stream.get('encodingOption', {})
+ bitrate = stream.get('bitrate', {})
+ formats.append({
+ 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
+ 'url': stream_url,
+ 'width': int_or_none(encoding_option.get('width')),
+ 'height': int_or_none(encoding_option.get('height')),
+ 'vbr': int_or_none(bitrate.get('video')),
+ 'abr': int_or_none(bitrate.get('audio')),
+ 'filesize': int_or_none(stream.get('size')),
+ 'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
+ })
+
+ extract_formats(get_list('video'), 'H264')
+ for stream_set in video_data.get('streams', []):
+ query = {}
+ for param in stream_set.get('keys', []):
+ query[param['name']] = param['value']
+ stream_type = stream_set.get('type')
+ videos = stream_set.get('videos')
+ if videos:
+ extract_formats(videos, stream_type, query)
+ elif stream_type == 'HLS':
+ stream_url = stream_set.get('source')
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ update_url_query(stream_url, query), video_id,
+ 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
+ self._sort_formats(formats)
+
+ replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
+
+ def get_subs(caption_url):
+ if re.search(self._CAPTION_EXT_RE, caption_url):
+ return [{
+ 'url': replace_ext(caption_url, 'ttml'),
+ }, {
+ 'url': replace_ext(caption_url, 'vtt'),
+ }]
+ else:
+ return [{'url': caption_url}]
+
+ automatic_captions = {}
+ subtitles = {}
+ for caption in get_list('caption'):
+ caption_url = caption.get('source')
+ if not caption_url:
+ continue
+ sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
+ sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url))
+
+ user = meta.get('user', {})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ 'thumbnail': try_get(meta, lambda x: x['cover']['source']),
+ 'view_count': int_or_none(meta.get('count')),
+ 'uploader_id': user.get('id'),
+ 'uploader': user.get('name'),
+ 'uploader_url': user.get('url'),
+ }
+
+
+class NaverIE(NaverBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'http://tv.naver.com/v/81652',
+ 'info_dict': {
+ 'id': '81652',
+ 'ext': 'mp4',
+ 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ 'timestamp': 1378200754,
+ 'upload_date': '20130903',
+ 'uploader': '메가스터디, 합격불변의 법칙',
+ 'uploader_id': 'megastudy',
+ },
+ }, {
+ 'url': 'http://tv.naver.com/v/395837',
+ 'md5': '8a38e35354d26a17f73f4e90094febd3',
+ 'info_dict': {
+ 'id': '395837',
+ 'ext': 'mp4',
+ 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
+ 'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
+ 'timestamp': 1432030253,
+ 'upload_date': '20150519',
+ 'uploader': '4가지쇼 시즌2',
+ 'uploader_id': 'wrappinguser29',
+ },
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ content = self._download_json(
+ 'https://tv.naver.com/api/json/v/' + video_id,
+ video_id, headers=self.geo_verification_headers())
+ player_info_json = content.get('playerInfoJson') or {}
+ current_clip = player_info_json.get('currentClip') or {}
+
+ vid = current_clip.get('videoId')
+ in_key = current_clip.get('inKey')
+
+ if not vid or not in_key:
+ player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
+ if player_auth == 'notCountry':
+ self.raise_geo_restricted(countries=['KR'])
+ elif player_auth == 'notLogin':
+ self.raise_login_required()
+ raise ExtractorError('couldn\'t extract vid and key')
+ info = self._extract_video_info(video_id, vid, in_key)
+ info.update({
+ 'description': clean_html(current_clip.get('description')),
+ 'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
+ 'duration': parse_duration(current_clip.get('displayPlayTime')),
+ 'like_count': int_or_none(current_clip.get('recommendPoint')),
+ 'age_limit': 19 if current_clip.get('adult') else None,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/nba.py b/hypervideo_dl/extractor/nba.py
new file mode 100644
index 0000000..fbc7ada
--- /dev/null
+++ b/hypervideo_dl/extractor/nba.py
@@ -0,0 +1,428 @@
+from __future__ import unicode_literals
+
+import functools
+import re
+
+from .turner import TurnerBaseIE
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ OnDemandPagedList,
+ parse_duration,
+ parse_iso8601,
+ try_get,
+ update_url_query,
+ urljoin,
+)
+
+
+class NBACVPBaseIE(TurnerBaseIE):
+ def _extract_nba_cvp_info(self, path, video_id, fatal=False):
+ return self._extract_cvp_info(
+ 'http://secure.nba.com/%s' % path, video_id, {
+ 'default': {
+ 'media_src': 'http://nba.cdn.turner.com/nba/big',
+ },
+ 'm3u8': {
+ 'media_src': 'http://nbavod-f.akamaihd.net',
+ },
+ }, fatal=fatal)
+
+
+class NBAWatchBaseIE(NBACVPBaseIE):
+ _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
+
+ def _extract_video(self, filter_key, filter_value):
+ video = self._download_json(
+ 'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
+ filter_value, query={
+ 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
+ 'q': filter_key + ':' + filter_value,
+ 'wt': 'json',
+ })['response']['docs'][0]
+
+ video_id = str(video['pid'])
+ title = video['name']
+
+ formats = []
+ m3u8_url = (self._download_json(
+ 'https://watch.nba.com/service/publishpoint', video_id, query={
+ 'type': 'video',
+ 'format': 'json',
+ 'id': video_id,
+ }, headers={
+ 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
+ }, fatal=False) or {}).get('path')
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+ for f in m3u8_formats:
+ http_f = f.copy()
+ http_f.update({
+ 'format_id': http_f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': http_f['url'].replace('.m3u8', ''),
+ })
+ formats.append(http_f)
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('runtime')),
+ 'timestamp': parse_iso8601(video.get('releaseDate')),
+ 'tags': video.get('tags'),
+ }
+
+ seo_name = video.get('seoName')
+ if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
+ base_path = ''
+ if seo_name.startswith('teams/'):
+ base_path += seo_name.split('/')[1] + '/'
+ base_path += 'video/'
+ cvp_info = self._extract_nba_cvp_info(
+ base_path + seo_name + '.xml', video_id, False)
+ if cvp_info:
+ formats.extend(cvp_info['formats'])
+ info = merge_dicts(info, cvp_info)
+
+ self._sort_formats(formats)
+ info['formats'] = formats
+ return info
+
+
+class NBAWatchEmbedIE(NBAWatchBaseIE):
+ IENAME = 'nba:watch:embed'
+ _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://watch.nba.com/embed?id=659395',
+ 'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
+ 'info_dict': {
+ 'id': '659395',
+ 'ext': 'mp4',
+ 'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
+ 'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
+ 'timestamp': 1492228800,
+ 'upload_date': '20170415',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_video('pid', video_id)
+
+
+class NBAWatchIE(NBAWatchBaseIE):
+ IE_NAME = 'nba:watch'
+ _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
+ 'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
+ 'info_dict': {
+ 'id': '70946',
+ 'ext': 'mp4',
+ 'title': 'Thunder vs. Nets',
+ 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+ 'duration': 181,
+ 'timestamp': 1354597200,
+ 'upload_date': '20121204',
+ },
+ }, {
+ 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
+ 'info_dict': {
+ 'id': '330865',
+ 'ext': 'mp4',
+ 'title': 'Hawks vs. Cavaliers Game 1',
+ 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
+ 'duration': 228,
+ 'timestamp': 1432094400,
+ 'upload_date': '20150521',
+ },
+ }, {
+ 'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
+ 'only_matching': True,
+ }, {
+ # only CVP mp4 format available
+ 'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
+ if collection_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
+ return self.url_result(
+ 'https://www.nba.com/watch/list/collection/' + collection_id,
+ NBAWatchCollectionIE.ie_key(), collection_id)
+ return self._extract_video('seoName', display_id)
+
+
+class NBAWatchCollectionIE(NBAWatchBaseIE):
+ IE_NAME = 'nba:watch:collection'
+ _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://watch.nba.com/list/collection/season-preview-2020',
+ 'info_dict': {
+ 'id': 'season-preview-2020',
+ },
+ 'playlist_mincount': 43,
+ }]
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, collection_id, page):
+ page += 1
+ videos = self._download_json(
+ 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
+ collection_id, 'Downloading page %d JSON metadata' % page, query={
+ 'count': self._PAGE_SIZE,
+ 'page': page,
+ })['results']['videos']
+ for video in videos:
+ program = video.get('program') or {}
+ seo_name = program.get('seoName') or program.get('slug')
+ if not seo_name:
+ continue
+ yield {
+ '_type': 'url',
+ 'id': program.get('id'),
+ 'title': program.get('title') or video.get('title'),
+ 'url': 'https://www.nba.com/watch/video/' + seo_name,
+ 'thumbnail': video.get('image'),
+ 'description': program.get('description') or video.get('description'),
+ 'duration': parse_duration(program.get('runtimeHours')),
+ 'timestamp': parse_iso8601(video.get('releaseDate')),
+ }
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, collection_id),
+ self._PAGE_SIZE)
+ return self.playlist_result(entries, collection_id)
+
+
+class NBABaseIE(NBACVPBaseIE):
+ _VALID_URL_BASE = r'''(?x)
+ https?://(?:www\.)?nba\.com/
+ (?P<team>
+ blazers|
+ bucks|
+ bulls|
+ cavaliers|
+ celtics|
+ clippers|
+ grizzlies|
+ hawks|
+ heat|
+ hornets|
+ jazz|
+ kings|
+ knicks|
+ lakers|
+ magic|
+ mavericks|
+ nets|
+ nuggets|
+ pacers|
+ pelicans|
+ pistons|
+ raptors|
+ rockets|
+ sixers|
+ spurs|
+ suns|
+ thunder|
+ timberwolves|
+ warriors|
+ wizards
+ )
+ (?:/play\#)?/'''
+ _CHANNEL_PATH_REGEX = r'video/channel|series'
+
+ def _embed_url_result(self, team, content_id):
+ return self.url_result(update_url_query(
+ 'https://secure.nba.com/assets/amp/include/video/iframe.html', {
+ 'contentId': content_id,
+ 'team': team,
+ }), NBAEmbedIE.ie_key())
+
+ def _call_api(self, team, content_id, query, resource):
+ return self._download_json(
+ 'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
+ content_id, 'Download %s JSON metadata' % resource,
+ query=query, headers={
+ 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
+ })['response']['result']
+
+ def _extract_video(self, video, team, extract_all=True):
+ video_id = compat_str(video['nid'])
+ team = video['brand']
+
+ info = {
+ 'id': video_id,
+ 'title': video.get('title') or video.get('headline') or video['shortHeadline'],
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('published')),
+ }
+
+ subtitles = {}
+ captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({'url': caption_url})
+
+ formats = []
+ mp4_url = video.get('mp4')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ })
+
+ if extract_all:
+ source_url = video.get('videoSource')
+ if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
+ formats.append({
+ 'format_id': 'source',
+ 'url': source_url,
+ 'preference': 1,
+ })
+
+ m3u8_url = video.get('m3u8')
+ if m3u8_url:
+ if '.akamaihd.net/i/' in m3u8_url:
+ formats.extend(self._extract_akamai_formats(
+ m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ content_xml = video.get('contentXml')
+ if team and content_xml:
+ cvp_info = self._extract_nba_cvp_info(
+ team + content_xml, video_id, fatal=False)
+ if cvp_info:
+ formats.extend(cvp_info['formats'])
+ subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
+ info = merge_dicts(info, cvp_info)
+
+ self._sort_formats(formats)
+ else:
+ info.update(self._embed_url_result(team, video['videoId']))
+
+ info.update({
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+
+ return info
+
+ def _real_extract(self, url):
+ team, display_id = re.match(self._VALID_URL, url).groups()
+ if '/play#/' in url:
+ display_id = compat_urllib_parse_unquote(display_id)
+ else:
+ webpage = self._download_webpage(url, display_id)
+ display_id = self._search_regex(
+ self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
+ return self._extract_url_results(team, display_id)
+
+
+class NBAEmbedIE(NBABaseIE):
+ IENAME = 'nba:embed'
+ _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
+ _TESTS = [{
+ 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ content_id = qs['contentId'][0]
+ team = qs.get('team', [None])[0]
+ if not team:
+ return self.url_result(
+ 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
+ video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
+ return self._extract_video(video, team)
+
+
+class NBAIE(NBABaseIE):
+ IENAME = 'nba'
+ _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
+ 'info_dict': {
+ 'id': '45039',
+ 'ext': 'mp4',
+ 'title': 'AND WE BACK.',
+ 'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
+ 'duration': 94,
+ 'timestamp': 1607112000,
+ 'upload_date': '20201218',
+ },
+ }, {
+ 'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
+ 'only_matching': True,
+ }]
+ _CONTENT_ID_REGEX = r'videoID'
+
+ def _extract_url_results(self, team, content_id):
+ return self._embed_url_result(team, content_id)
+
+
+class NBAChannelIE(NBABaseIE):
+ IENAME = 'nba:channel'
+ _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.nba.com/blazers/video/channel/summer_league',
+ 'info_dict': {
+ 'title': 'Summer League',
+ },
+ 'playlist_mincount': 138,
+ }, {
+ 'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
+ 'only_matching': True,
+ }]
+ _CONTENT_ID_REGEX = r'videoSubCategory'
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, team, channel, page):
+ results = self._call_api(team, channel, {
+ 'channels': channel,
+ 'count': self._PAGE_SIZE,
+ 'offset': page * self._PAGE_SIZE,
+ }, 'page %d' % (page + 1))
+ for video in results:
+ yield self._extract_video(video, team, False)
+
+ def _extract_url_results(self, team, content_id):
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, team, content_id),
+ self._PAGE_SIZE)
+ return self.playlist_result(entries, playlist_title=content_id)
diff --git a/hypervideo_dl/extractor/nbc.py b/hypervideo_dl/extractor/nbc.py
new file mode 100644
index 0000000..0d77648
--- /dev/null
+++ b/hypervideo_dl/extractor/nbc.py
@@ -0,0 +1,525 @@
+from __future__ import unicode_literals
+
+import base64
+import json
+import re
+
+from .common import InfoExtractor
+from .theplatform import ThePlatformIE
+from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ smuggle_url,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class NBCIE(AdobePassIE):
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
+ 'info_dict': {
+ 'id': '2848237',
+ 'ext': 'mp4',
+ 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+ 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
+ 'timestamp': 1424246400,
+ 'upload_date': '20150218',
+ 'uploader': 'NBCU-COM',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
+ 'info_dict': {
+ 'id': '2832821',
+ 'ext': 'mp4',
+ 'title': 'Star Wars Teaser',
+ 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+ 'timestamp': 1417852800,
+ 'upload_date': '20141206',
+ 'uploader': 'NBCU-COM',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Only works from US',
+ },
+ {
+ # HLS streams requires the 'hdnea3' cookie
+ 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
+ 'info_dict': {
+ 'id': '101528f5a9e8127b107e98c5e6ce4638',
+ 'ext': 'mp4',
+ 'title': 'Goliath',
+ 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
+ 'timestamp': 1237100400,
+ 'upload_date': '20090315',
+ 'uploader': 'NBCU-COM',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only works from US',
+ },
+ {
+ 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
+ 'only_matching': True,
+ },
+ {
+ # Percent escaped url
+ 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ permalink, video_id = re.match(self._VALID_URL, url).groups()
+ permalink = 'http' + compat_urllib_parse_unquote(permalink)
+ video_data = self._download_json(
+ 'https://friendship.nbc.co/v2/graphql', video_id, query={
+ 'query': '''query bonanzaPage(
+ $app: NBCUBrands! = nbc
+ $name: String!
+ $oneApp: Boolean
+ $platform: SupportedPlatforms! = web
+ $type: EntityPageType! = VIDEO
+ $userId: String!
+) {
+ bonanzaPage(
+ app: $app
+ name: $name
+ oneApp: $oneApp
+ platform: $platform
+ type: $type
+ userId: $userId
+ ) {
+ metadata {
+ ... on VideoPageData {
+ description
+ episodeNumber
+ keywords
+ locked
+ mpxAccountId
+ mpxGuid
+ rating
+ resourceId
+ seasonNumber
+ secondaryTitle
+ seriesShortTitle
+ }
+ }
+ }
+}''',
+ 'variables': json.dumps({
+ 'name': permalink,
+ 'oneApp': True,
+ 'userId': '0',
+ }),
+ })['data']['bonanzaPage']['metadata']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ video_id = video_data['mpxGuid']
+ title = video_data['secondaryTitle']
+ if video_data.get('locked'):
+ resource = self._get_mvpd_resource(
+ video_data.get('resourceId') or 'nbcentertainment',
+ title, video_id, video_data.get('rating'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'nbcentertainment', resource)
+ theplatform_url = smuggle_url(update_url_query(
+ 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
+ query), {'force_smil_url': True})
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': theplatform_url,
+ 'description': video_data.get('description'),
+ 'tags': video_data.get('keywords'),
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ 'episode': title,
+ 'series': video_data.get('seriesShortTitle'),
+ 'ie_key': 'ThePlatform',
+ }
+
+
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
+ _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'mp4',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'timestamp': 1426270238,
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ }
+ }, {
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage).replace(
+ 'vplayer.nbcsports.com', 'player.theplatform.com')
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TESTS = [{
+ # iframe src
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'mp4',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20150330',
+ 'timestamp': 1427726529,
+ }
+ }, {
+ # data-mpx-src
+ 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
+ 'only_matching': True,
+ }, {
+ # data-src
+ 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
+class NBCSportsStreamIE(AdobePassIE):
+ _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
+ 'info_dict': {
+ 'id': '206559',
+ 'ext': 'mp4',
+ 'title': 'Amgen Tour of California Women\'s Recap',
+ 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Adobe Pass Authentication',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ live_source = self._download_json(
+ 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
+ video_id)
+ video_source = live_source['videoSources'][0]
+ title = video_source['title']
+ source_url = None
+ for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
+ sk = k + 'Url'
+ source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
+ if source_url:
+ break
+ else:
+ source_url = video_source['ottStreamUrl']
+ is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
+ resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
+ token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
+ tokenized_url = self._download_json(
+ 'https://token.playmakerservices.com/cdn',
+ video_id, data=json.dumps({
+ 'requestorId': 'nbcsports',
+ 'pid': video_id,
+ 'application': 'NBCSports',
+ 'version': 'v1',
+ 'platform': 'desktop',
+ 'cdn': 'akamai',
+ 'url': video_source['sourceUrl'],
+ 'token': base64.b64encode(token.encode()).decode(),
+ 'resourceId': base64.b64encode(resource.encode()).decode(),
+ }).encode())['tokenizedUrl']
+ formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': live_source.get('description'),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class NBCNewsIE(ThePlatformIE):
+ _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
+ 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
+ 'info_dict': {
+ 'id': '269389891880',
+ 'ext': 'mp4',
+ 'title': 'How Twitter Reacted To The Snowden Interview',
+ 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
+ 'timestamp': 1401363060,
+ 'upload_date': '20140529',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
+ 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
+ 'info_dict': {
+ 'id': '529953347624',
+ 'ext': 'mp4',
+ 'title': 'FULL EPISODE: Family Business',
+ 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
+ },
+ 'skip': 'This page is unavailable.',
+ },
+ {
+ 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
+ 'md5': '8eb831eca25bfa7d25ddd83e85946548',
+ 'info_dict': {
+ 'id': '394064451844',
+ 'ext': 'mp4',
+ 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
+ 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
+ 'timestamp': 1423104900,
+ 'upload_date': '20150205',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
+ 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
+ 'info_dict': {
+ 'id': 'n431456',
+ 'ext': 'mp4',
+ 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
+ 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
+ 'upload_date': '20150922',
+ 'timestamp': 1442917800,
+ },
+ },
+ {
+ 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
+ 'md5': '118d7ca3f0bea6534f119c68ef539f71',
+ 'info_dict': {
+ 'id': '669831235788',
+ 'ext': 'mp4',
+ 'title': 'See the aurora borealis from space in stunning new NASA video',
+ 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
+ 'upload_date': '20160420',
+ 'timestamp': 1461152093,
+ },
+ },
+ {
+ 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
+ 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
+ 'info_dict': {
+ 'id': '314487875924',
+ 'ext': 'mp4',
+ 'title': 'The chaotic GOP immigration vote',
+ 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1406937606,
+ 'upload_date': '20140802',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
+ 'only_matching': True,
+ },
+ {
+ # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
+ 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(self._search_regex(
+ r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
+ webpage, 'bootstrap json'), video_id)['props']['initialState']
+ video_data = try_get(data, lambda x: x['video']['current'], dict)
+ if not video_data:
+ video_data = data['article']['content'][0]['primaryMedia']['video']
+ title = video_data['headline']['primary']
+
+ formats = []
+ for va in video_data.get('videoAssets', []):
+ public_url = va.get('publicUrl')
+ if not public_url:
+ continue
+ if '://link.theplatform.com/' in public_url:
+ public_url = update_url_query(public_url, {'format': 'redirect'})
+ format_id = va.get('format')
+ if format_id == 'M3U':
+ formats.extend(self._extract_m3u8_formats(
+ public_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ continue
+ tbr = int_or_none(va.get('bitrate'), 1000)
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': public_url,
+ 'width': int_or_none(va.get('width')),
+ 'height': int_or_none(va.get('height')),
+ 'tbr': tbr,
+ 'ext': 'mp4',
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ closed_captioning = video_data.get('closedCaptioning')
+ if closed_captioning:
+ for cc_url in closed_captioning.values():
+ if not cc_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': try_get(video_data, lambda x: x['description']['primary']),
+ 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'timestamp': unified_timestamp(video_data.get('datePublished')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class NBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'nbcolympics'
+ _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
+
+ _TEST = {
+ # Geo-restricted to US
+ 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+ 'md5': '54fecf846d05429fbaa18af557ee523a',
+ 'info_dict': {
+ 'id': 'WjTBzDXx5AUq',
+ 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+ 'ext': 'mp4',
+ 'title': 'Rose\'s son Leo was in tears after his dad won gold',
+ 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
+ 'timestamp': 1471274964,
+ 'upload_date': '20160815',
+ 'uploader': 'NBCU-SPORTS',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ iframe_url = drupal_settings['vod']['iframe_url']
+ theplatform_url = iframe_url.replace(
+ 'vplayer.nbcolympics.com', 'player.theplatform.com')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': theplatform_url,
+ 'ie_key': ThePlatformIE.ie_key(),
+ 'display_id': display_id,
+ }
+
+
+class NBCOlympicsStreamIE(AdobePassIE):
+ IE_NAME = 'nbcolympics:stream'
+ _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
+ _TEST = {
+ 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
+ 'info_dict': {
+ 'id': '203493',
+ 'ext': 'mp4',
+ 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
+ resource = self._search_regex(
+ r"resource\s*=\s*'(.+)';", webpage,
+ 'resource').replace("' + pid + '", pid)
+ event_config = self._download_json(
+ self._DATA_URL_TEMPLATE % ('event_config', pid),
+ pid)['eventConfig']
+ title = self._live_title(event_config['eventTitle'])
+ source_url = self._download_json(
+ self._DATA_URL_TEMPLATE % ('live_sources', pid),
+ pid)['videoSources'][0]['sourceUrl']
+ media_token = self._extract_mvpd_auth(
+ url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
+ formats = self._extract_m3u8_formats(self._download_webpage(
+ 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
+ 'cdn': 'akamai',
+ 'mediaToken': base64.b64encode(media_token.encode()),
+ 'resource': base64.b64encode(resource.encode()),
+ 'url': source_url,
+ }), pid, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': pid,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/ndr.py b/hypervideo_dl/extractor/ndr.py
new file mode 100644
index 0000000..ddd828d
--- /dev/null
+++ b/hypervideo_dl/extractor/ndr.py
@@ -0,0 +1,440 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ merge_dicts,
+ parse_iso8601,
+ qualities,
+ try_get,
+ urljoin,
+)
+
+
+class NDRBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = next(group for group in mobj.groups() if group)
+ webpage = self._download_webpage(url, display_id)
+ return self._extract_embed(webpage, display_id)
+
+
+class NDRIE(NDRBaseIE):
+ IE_NAME = 'ndr'
+ IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
+ _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+ _TESTS = [{
+ # httpVideo, same content id
+ 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
+ 'info_dict': {
+ 'id': 'hafengeburtstag988',
+ 'display_id': 'Party-Poette-und-Parade',
+ 'ext': 'mp4',
+ 'title': 'Party, Pötte und Parade',
+ 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1431108900,
+ 'upload_date': '20150510',
+ 'duration': 3498,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpVideo, different content id
+ 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+ 'md5': '1043ff203eab307f0c51702ec49e9a71',
+ 'info_dict': {
+ 'id': 'osna272',
+ 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
+ 'ext': 'mp4',
+ 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+ 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1442059200,
+ 'upload_date': '20150912',
+ 'duration': 510,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpAudio, same content id
+ 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'info_dict': {
+ 'id': 'audio51535',
+ 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
+ 'uploader': 'ndrinfo',
+ 'timestamp': 1290626100,
+ 'upload_date': '20140729',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with subtitles
+ 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ 'info_dict': {
+ 'id': 'extra18674',
+ 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
+ 'ext': 'mp4',
+ 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+ 'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20201113',
+ 'duration': 1749,
+ 'subtitles': {
+ 'de': [{
+ 'ext': 'ttml',
+ 'url': r're:^https://www\.ndr\.de.+',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_embed(self, webpage, display_id):
+ embed_url = self._html_search_meta(
+ 'embedURL', webpage, 'embed URL',
+ default=None) or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url')
+ description = self._search_regex(
+ r'<p[^>]+itemprop="description">([^<]+)</p>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ timestamp = parse_iso8601(
+ self._search_regex(
+ r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
+ webpage, 'upload date', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'description': description,
+ 'timestamp': timestamp,
+ }, info)
+
+
+class NJoyIE(NDRBaseIE):
+ IE_NAME = 'njoy'
+ IE_DESC = 'N-JOY'
+ _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
+ _TESTS = [{
+ # httpVideo, same content id
+ 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
+ 'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+ 'info_dict': {
+ 'id': 'comedycontest2480',
+ 'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
+ 'ext': 'mp4',
+ 'title': 'Benaissa beim NDR Comedy Contest',
+ 'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20141129',
+ 'duration': 654,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpVideo, different content id
+ 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
+ 'md5': '417660fffa90e6df2fda19f1b40a64d8',
+ 'info_dict': {
+ 'id': 'dockville882',
+ 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
+ 'ext': 'mp4',
+ 'title': '"Ich hab noch nie" mit Felix Jaehn',
+ 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+ 'uploader': 'njoy',
+ 'upload_date': '20150822',
+ 'duration': 211,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_embed(self, webpage, display_id):
+ video_id = self._search_regex(
+ r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
+ description = self._search_regex(
+ r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+ webpage, 'description', fatal=False)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'NDREmbedBase',
+ 'url': 'ndr:%s' % video_id,
+ 'display_id': display_id,
+ 'description': description,
+ }
+
+
+class NDREmbedBaseIE(InfoExtractor):
+ IE_NAME = 'ndr:embed:base'
+ _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
+ _TESTS = [{
+ 'url': 'ndr:soundcheck3366',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_s')
+
+ ppjson = self._download_json(
+ 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
+
+ playlist = ppjson['playlist']
+
+ formats = []
+ quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
+
+ for format_id, f in playlist.items():
+ src = f.get('src')
+ if not src:
+ continue
+ ext = determine_ext(src, None)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
+ f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False))
+ else:
+ quality = f.get('quality')
+ ff = {
+ 'url': src,
+ 'format_id': quality or format_id,
+ 'quality': quality_key(quality),
+ }
+ type_ = f.get('type')
+ if type_ and type_.split('/')[0] == 'audio':
+ ff['vcodec'] = 'none'
+ ff['ext'] = ext or 'mp3'
+ formats.append(ff)
+ self._sort_formats(formats)
+
+ config = playlist['config']
+
+ live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
+ title = config['title']
+ if live:
+ title = self._live_title(title)
+ uploader = ppjson.get('config', {}).get('branding')
+ upload_date = ppjson.get('config', {}).get('publicationDate')
+ duration = int_or_none(config.get('duration'))
+
+ thumbnails = []
+ poster = try_get(config, lambda x: x['poster'], dict) or {}
+ for thumbnail_id, thumbnail in poster.items():
+ thumbnail_url = urljoin(url, thumbnail.get('src'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail.get('quality') or thumbnail_id,
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail.get('quality')),
+ })
+
+ subtitles = {}
+ tracks = config.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_url = urljoin(url, track.get('src'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('srclang') or 'de', []).append({
+ 'url': track_url,
+ 'ext': 'ttml',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'is_live': live,
+ 'uploader': uploader if uploader != '-' else None,
+ 'upload_date': upload_date[0:8] if upload_date else None,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class NDREmbedIE(NDREmbedBaseIE):
+ IE_NAME = 'ndr:embed'
+ _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+ _TESTS = [{
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
+ 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
+ 'info_dict': {
+ 'id': 'ndraktuell28488',
+ 'ext': 'mp4',
+ 'title': 'Norddeutschland begrüßt Flüchtlinge',
+ 'is_live': False,
+ 'uploader': 'ndrtv',
+ 'upload_date': '20150907',
+ 'duration': 132,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
+ 'md5': '002085c44bae38802d94ae5802a36e78',
+ 'info_dict': {
+ 'id': 'soundcheck3366',
+ 'ext': 'mp4',
+ 'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
+ 'is_live': False,
+ 'uploader': 'ndr2',
+ 'upload_date': '20150912',
+ 'duration': 3554,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/info/audio51535-player.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'info_dict': {
+ 'id': 'audio51535',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'is_live': False,
+ 'uploader': 'ndrinfo',
+ 'upload_date': '20140729',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
+ 'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
+ 'info_dict': {
+ 'id': 'visite11010',
+ 'ext': 'mp4',
+ 'title': 'Visite - die ganze Sendung',
+ 'is_live': False,
+ 'uploader': 'ndrtv',
+ 'upload_date': '20150902',
+ 'duration': 3525,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpVideoLive
+ 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
+ 'info_dict': {
+ 'id': 'livestream217',
+ 'ext': 'flv',
+ 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ 'upload_date': '20150910',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
+ 'only_matching': True,
+ }]
+
+
+class NJoyEmbedIE(NDREmbedBaseIE):
+ IE_NAME = 'njoy:embed'
+ _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
+ _TESTS = [{
+ # httpVideo
+ 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
+ 'md5': '8483cbfe2320bd4d28a349d62d88bd74',
+ 'info_dict': {
+ 'id': 'doku948',
+ 'ext': 'mp4',
+ 'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
+ 'is_live': False,
+ 'upload_date': '20150807',
+ 'duration': 1011,
+ },
+ }, {
+ # httpAudio
+ 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
+ 'md5': 'd989f80f28ac954430f7b8a48197188a',
+ 'info_dict': {
+ 'id': 'stefanrichter100',
+ 'ext': 'mp3',
+ 'title': 'Interview mit einem Augenzeugen',
+ 'is_live': False,
+ 'uploader': 'njoy',
+ 'upload_date': '20150909',
+ 'duration': 140,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpAudioLive, no explicit ext
+ 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
+ 'info_dict': {
+ 'id': 'webradioweltweit100',
+ 'ext': 'mp3',
+ 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ 'uploader': 'njoy',
+ 'upload_date': '20150810',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/ndtv.py b/hypervideo_dl/extractor/ndtv.py
new file mode 100644
index 0000000..bc3eb91
--- /dev/null
+++ b/hypervideo_dl/extractor/ndtv.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote_plus
+)
+from ..utils import (
+ parse_duration,
+ remove_end,
+ unified_strdate,
+ urljoin
+)
+
+
+class NDTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ 'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
+ 'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
+ 'info_dict': {
+ 'id': '468818',
+ 'ext': 'mp4',
+ 'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
+ 'description': 'md5:f410512f1b49672e5695dea16ef2731d',
+ 'upload_date': '20170928',
+ 'duration': 2218,
+ 'thumbnail': r're:https?://.*\.jpg',
+ }
+ },
+ {
+ # __filename is url
+ 'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
+ 'md5': 'f1d709352305b44443515ac56b45aa46',
+ 'info_dict': {
+ 'id': '470304',
+ 'ext': 'mp4',
+ 'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
+ 'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
+ 'upload_date': '20171019',
+ 'duration': 137,
+ 'thumbnail': r're:https?://.*\.jpg',
+ }
+ },
+ {
+ 'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # '__title' does not contain extra words such as sub-site name, "Video" etc.
+ title = compat_urllib_parse_unquote_plus(
+ self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
+ or self._og_search_title(webpage))
+
+ filename = self._search_regex(
+ r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
+ # in "movies" sub-site pages, filename is URL
+ video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
+
+ # "doctor" sub-site has MM:SS format
+ duration = parse_duration(self._search_regex(
+ r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
+
+ # "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
+ upload_date = unified_strdate(self._html_search_meta(
+ 'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
+ r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
+
+ description = remove_end(self._og_search_description(webpage), ' (Read more)')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'upload_date': upload_date,
+ }
diff --git a/hypervideo_dl/extractor/nerdcubed.py b/hypervideo_dl/extractor/nerdcubed.py
new file mode 100644
index 0000000..9feccc6
--- /dev/null
+++ b/hypervideo_dl/extractor/nerdcubed.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+
+from .common import InfoExtractor
+
+
+class NerdCubedFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
+ _TEST = {
+ 'url': 'http://www.nerdcubed.co.uk/feed.json',
+ 'info_dict': {
+ 'id': 'nerdcubed-feed',
+ 'title': 'nerdcubed.co.uk feed',
+ },
+ 'playlist_mincount': 1300,
+ }
+
+ def _real_extract(self, url):
+ feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed')
+
+ entries = [{
+ '_type': 'url',
+ 'title': feed_entry['title'],
+ 'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
+ 'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
+ 'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
+ } for feed_entry in feed]
+
+ return {
+ '_type': 'playlist',
+ 'title': 'nerdcubed.co.uk feed',
+ 'id': 'nerdcubed-feed',
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/neteasemusic.py b/hypervideo_dl/extractor/neteasemusic.py
new file mode 100644
index 0000000..978a058
--- /dev/null
+++ b/hypervideo_dl/extractor/neteasemusic.py
@@ -0,0 +1,485 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from hashlib import md5
+from base64 import b64encode
+from datetime import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlencode,
+ compat_str,
+ compat_itertools_count,
+)
+from ..utils import (
+ sanitized_Request,
+ float_or_none,
+)
+
+
+class NetEaseMusicBaseIE(InfoExtractor):
+ _FORMATS = ['bMusic', 'mMusic', 'hMusic']
+ _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
+ _API_BASE = 'http://music.163.com/api/'
+
+ @classmethod
+ def _encrypt(cls, dfsid):
+ salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
+ string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
+ salt_len = len(salt_bytes)
+ for i in range(len(string_bytes)):
+ string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
+ m = md5()
+ m.update(bytes(string_bytes))
+ result = b64encode(m.digest()).decode('ascii')
+ return result.replace('/', '_').replace('+', '-')
+
+ def extract_formats(self, info):
+ formats = []
+ for song_format in self._FORMATS:
+ details = info.get(song_format)
+ if not details:
+ continue
+ song_file_path = '/%s/%s.%s' % (
+ self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
+
+ # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
+ # from NetEase's CDN provider that can be used if m5.music.126.net does not
+ # work, especially for users outside of Mainland China
+ # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
+ for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
+ 'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
+ song_url = host + song_file_path
+ if self._is_valid_url(song_url, info['id'], 'song'):
+ formats.append({
+ 'url': song_url,
+ 'ext': details.get('extension'),
+ 'abr': float_or_none(details.get('bitrate'), scale=1000),
+ 'format_id': song_format,
+ 'filesize': details.get('size'),
+ 'asr': details.get('sr')
+ })
+ break
+ return formats
+
+ @classmethod
+ def convert_milliseconds(cls, ms):
+ return int(round(ms / 1000.0))
+
+ def query_api(self, endpoint, video_id, note):
+ req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
+ req.add_header('Referer', self._API_BASE)
+ return self._download_json(req, video_id, note)
+
+
+class NetEaseMusicIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:song'
+ IE_DESC = '网易云音乐'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/song?id=32102397',
+ 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+ 'info_dict': {
+ 'id': '32102397',
+ 'ext': 'mp3',
+ 'title': 'Bad Blood (feat. Kendrick Lamar)',
+ 'creator': 'Taylor Swift / Kendrick Lamar',
+ 'upload_date': '20150517',
+ 'timestamp': 1431878400,
+ 'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'No lyrics translation.',
+ 'url': 'http://music.163.com/#/song?id=29822014',
+ 'info_dict': {
+ 'id': '29822014',
+ 'ext': 'mp3',
+ 'title': '听见下雨的声音',
+ 'creator': '周杰伦',
+ 'upload_date': '20141225',
+ 'timestamp': 1419523200,
+ 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'No lyrics.',
+ 'url': 'http://music.163.com/song?id=17241424',
+ 'info_dict': {
+ 'id': '17241424',
+ 'ext': 'mp3',
+ 'title': 'Opus 28',
+ 'creator': 'Dustin O\'Halloran',
+ 'upload_date': '20080211',
+ 'timestamp': 1202745600,
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'Has translated name.',
+ 'url': 'http://music.163.com/#/song?id=22735043',
+ 'info_dict': {
+ 'id': '22735043',
+ 'ext': 'mp3',
+ 'title': '소원을 말해봐 (Genie)',
+ 'creator': '少女时代',
+ 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
+ 'upload_date': '20100127',
+ 'timestamp': 1264608000,
+ 'alt_title': '说出愿望吧(Genie)',
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }]
+
+ def _process_lyrics(self, lyrics_info):
+ original = lyrics_info.get('lrc', {}).get('lyric')
+ translated = lyrics_info.get('tlyric', {}).get('lyric')
+
+ if not translated:
+ return original
+
+ lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
+ original_ts_texts = re.findall(lyrics_expr, original)
+ translation_ts_dict = dict(
+ (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
+ )
+ lyrics = '\n'.join([
+ '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
+ for time_stamp, text in original_ts_texts
+ ])
+ return lyrics
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+
+ params = {
+ 'id': song_id,
+ 'ids': '[%s]' % song_id
+ }
+ info = self.query_api(
+ 'song/detail?' + compat_urllib_parse_urlencode(params),
+ song_id, 'Downloading song info')['songs'][0]
+
+ formats = self.extract_formats(info)
+ self._sort_formats(formats)
+
+ lyrics_info = self.query_api(
+ 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
+ song_id, 'Downloading lyrics data')
+ lyrics = self._process_lyrics(lyrics_info)
+
+ alt_title = None
+ if info.get('transNames'):
+ alt_title = '/'.join(info.get('transNames'))
+
+ return {
+ 'id': song_id,
+ 'title': info['name'],
+ 'alt_title': alt_title,
+ 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
+ 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
+ 'thumbnail': info.get('album', {}).get('picUrl'),
+ 'duration': self.convert_milliseconds(info.get('duration', 0)),
+ 'description': lyrics,
+ 'formats': formats,
+ }
+
+
+class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:album'
+ IE_DESC = '网易云音乐 - 专辑'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/album?id=220780',
+ 'info_dict': {
+ 'id': '220780',
+ 'title': 'B\'day',
+ },
+ 'playlist_count': 23,
+ 'skip': 'Blocked outside Mainland China',
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ info = self.query_api(
+ 'album/%s?id=%s' % (album_id, album_id),
+ album_id, 'Downloading album data')['album']
+
+ name = info['name']
+ desc = info.get('description')
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+ 'NetEaseMusic', song['id'])
+ for song in info['songs']
+ ]
+ return self.playlist_result(entries, album_id, name, desc)
+
+
+class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:singer'
+ IE_DESC = '网易云音乐 - 歌手'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'note': 'Singer has aliases.',
+ 'url': 'http://music.163.com/#/artist?id=10559',
+ 'info_dict': {
+ 'id': '10559',
+ 'title': '张惠妹 - aMEI;阿密特',
+ },
+ 'playlist_count': 50,
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'Singer has translated name.',
+ 'url': 'http://music.163.com/#/artist?id=124098',
+ 'info_dict': {
+ 'id': '124098',
+ 'title': '李昇基 - 이승기',
+ },
+ 'playlist_count': 50,
+ 'skip': 'Blocked outside Mainland China',
+ }]
+
+ def _real_extract(self, url):
+ singer_id = self._match_id(url)
+
+ info = self.query_api(
+ 'artist/%s?id=%s' % (singer_id, singer_id),
+ singer_id, 'Downloading singer data')
+
+ name = info['artist']['name']
+ if info['artist']['trans']:
+ name = '%s - %s' % (name, info['artist']['trans'])
+ if info['artist']['alias']:
+ name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
+
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+ 'NetEaseMusic', song['id'])
+ for song in info['hotSongs']
+ ]
+ return self.playlist_result(entries, singer_id, name)
+
+
+class NetEaseMusicListIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:playlist'
+ IE_DESC = '网易云音乐 - 歌单'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/playlist?id=79177352',
+ 'info_dict': {
+ 'id': '79177352',
+ 'title': 'Billboard 2007 Top 100',
+ 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
+ },
+ 'playlist_count': 99,
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'Toplist/Charts sample',
+ 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
+ 'info_dict': {
+ 'id': '3733003',
+ 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+ 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
+ },
+ 'playlist_count': 50,
+ 'skip': 'Blocked outside Mainland China',
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ info = self.query_api(
+ 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
+ list_id, 'Downloading playlist data')['result']
+
+ name = info['name']
+ desc = info.get('description')
+
+ if info.get('specialType') == 10: # is a chart/toplist
+ datestamp = datetime.fromtimestamp(
+ self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
+ name = '%s %s' % (name, datestamp)
+
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+ 'NetEaseMusic', song['id'])
+ for song in info['tracks']
+ ]
+ return self.playlist_result(entries, list_id, name, desc)
+
+
+class NetEaseMusicMvIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:mv'
+ IE_DESC = '网易云音乐 - MV'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/mv?id=415350',
+ 'info_dict': {
+ 'id': '415350',
+ 'ext': 'mp4',
+ 'title': '이럴거면 그러지말지',
+ 'description': '白雅言自作曲唱甜蜜爱情',
+ 'creator': '白雅言',
+ 'upload_date': '20150520',
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }
+
+ def _real_extract(self, url):
+ mv_id = self._match_id(url)
+
+ info = self.query_api(
+ 'mv/detail?id=%s&type=mp4' % mv_id,
+ mv_id, 'Downloading mv info')['data']
+
+ formats = [
+ {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
+ for brs, mv_url in info['brs'].items()
+ ]
+ self._sort_formats(formats)
+
+ return {
+ 'id': mv_id,
+ 'title': info['name'],
+ 'description': info.get('desc') or info.get('briefDesc'),
+ 'creator': info['artistName'],
+ 'upload_date': info['publishTime'].replace('-', ''),
+ 'formats': formats,
+ 'thumbnail': info.get('cover'),
+ 'duration': self.convert_milliseconds(info.get('duration', 0)),
+ }
+
+
+class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:program'
+ IE_DESC = '网易云音乐 - 电台节目'
+ _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://music.163.com/#/program?id=10109055',
+ 'info_dict': {
+ 'id': '10109055',
+ 'ext': 'mp3',
+ 'title': '不丹足球背后的故事',
+ 'description': '喜马拉雅人的足球梦 ...',
+ 'creator': '大话西藏',
+ 'timestamp': 1434179342,
+ 'upload_date': '20150613',
+ 'duration': 900,
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'This program has accompanying songs.',
+ 'url': 'http://music.163.com/#/program?id=10141022',
+ 'info_dict': {
+ 'id': '10141022',
+ 'title': '25岁,你是自在如风的少年<27°C>',
+ 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+ },
+ 'playlist_count': 4,
+ 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'note': 'This program has accompanying songs.',
+ 'url': 'http://music.163.com/#/program?id=10141022',
+ 'info_dict': {
+ 'id': '10141022',
+ 'ext': 'mp3',
+ 'title': '25岁,你是自在如风的少年<27°C>',
+ 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+ 'timestamp': 1434450841,
+ 'upload_date': '20150616',
+ },
+ 'params': {
+ 'noplaylist': True
+ },
+ 'skip': 'Blocked outside Mainland China',
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url)
+
+ info = self.query_api(
+ 'dj/program/detail?id=%s' % program_id,
+ program_id, 'Downloading program info')['program']
+
+ name = info['name']
+ description = info['description']
+
+ if not info['songs'] or self._downloader.params.get('noplaylist'):
+ if info['songs']:
+ self.to_screen(
+ 'Downloading just the main audio %s because of --no-playlist'
+ % info['mainSong']['id'])
+
+ formats = self.extract_formats(info['mainSong'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': program_id,
+ 'title': name,
+ 'description': description,
+ 'creator': info['dj']['brand'],
+ 'timestamp': self.convert_milliseconds(info['createTime']),
+ 'thumbnail': info['coverUrl'],
+ 'duration': self.convert_milliseconds(info.get('duration', 0)),
+ 'formats': formats,
+ }
+
+ self.to_screen(
+ 'Downloading playlist %s - add --no-playlist to just download the main audio %s'
+ % (program_id, info['mainSong']['id']))
+
+ song_ids = [info['mainSong']['id']]
+ song_ids.extend([song['id'] for song in info['songs']])
+ entries = [
+ self.url_result('http://music.163.com/#/song?id=%s' % song_id,
+ 'NetEaseMusic', song_id)
+ for song_id in song_ids
+ ]
+ return self.playlist_result(entries, program_id, name, description)
+
+
+class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
+ IE_NAME = 'netease:djradio'
+ IE_DESC = '网易云音乐 - 电台'
+ _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://music.163.com/#/djradio?id=42',
+ 'info_dict': {
+ 'id': '42',
+ 'title': '声音蔓延',
+ 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
+ },
+ 'playlist_mincount': 40,
+ 'skip': 'Blocked outside Mainland China',
+ }
+ _PAGE_SIZE = 1000
+
+ def _real_extract(self, url):
+ dj_id = self._match_id(url)
+
+ name = None
+ desc = None
+ entries = []
+ for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
+ info = self.query_api(
+ 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
+ % (self._PAGE_SIZE, dj_id, offset),
+ dj_id, 'Downloading dj programs - %d' % offset)
+
+ entries.extend([
+ self.url_result(
+ 'http://music.163.com/#/program?id=%s' % program['id'],
+ 'NetEaseMusicProgram', program['id'])
+ for program in info['programs']
+ ])
+
+ if name is None:
+ radio = info['programs'][0]['radio']
+ name = radio['name']
+ desc = radio['desc']
+
+ if not info['more']:
+ break
+
+ return self.playlist_result(entries, dj_id, name, desc)
diff --git a/hypervideo_dl/extractor/netzkino.py b/hypervideo_dl/extractor/netzkino.py
new file mode 100644
index 0000000..aec3026
--- /dev/null
+++ b/hypervideo_dl/extractor/netzkino.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class NetzkinoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+ 'md5': '92a3f8b76f8d7220acce5377ea5d4873',
+ 'info_dict': {
+ 'id': 'rakete-zum-mond',
+ 'ext': 'mp4',
+ 'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
+ 'comments': 'mincount:3',
+ 'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
+ 'upload_date': '20120813',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1344858571,
+ 'age_limit': 12,
+ },
+ 'params': {
+ 'skip_download': 'Download only works from Germany',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ category_id = mobj.group('category')
+ video_id = mobj.group('id')
+
+ api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
+ api_info = self._download_json(api_url, video_id)
+ info = next(
+ p for p in api_info['posts'] if p['slug'] == video_id)
+ custom_fields = info['custom_fields']
+
+ production_js = self._download_webpage(
+ 'http://www.netzkino.de/beta/dist/production.min.js', video_id,
+ note='Downloading player code')
+ avo_js = self._search_regex(
+ r'var urlTemplate=(\{.*?"\})',
+ production_js, 'URL templates')
+ templates = self._parse_json(
+ avo_js, video_id, transform_source=js_to_json)
+
+ suffix = {
+ 'hds': '.mp4/manifest.f4m',
+ 'hls': '.mp4/master.m3u8',
+ 'pmd': '.mp4',
+ }
+ film_fn = custom_fields['Streaming'][0]
+ formats = [{
+ 'format_id': key,
+ 'ext': 'mp4',
+ 'url': tpl.replace('{}', film_fn) + suffix[key],
+ } for key, tpl in templates.items()]
+ self._sort_formats(formats)
+
+ comments = [{
+ 'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
+ 'id': c['id'],
+ 'author': c['name'],
+ 'html': c['content'],
+ 'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
+ } for c in info.get('comments', [])]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'comments': comments,
+ 'title': info['title'],
+ 'age_limit': int_or_none(custom_fields.get('FSK')[0]),
+ 'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
+ 'description': clean_html(info.get('content')),
+ 'thumbnail': info.get('thumbnail'),
+ 'playlist_title': api_info.get('title'),
+ 'playlist_id': category_id,
+ }
diff --git a/hypervideo_dl/extractor/newgrounds.py b/hypervideo_dl/extractor/newgrounds.py
new file mode 100644
index 0000000..82e7cf5
--- /dev/null
+++ b/hypervideo_dl/extractor/newgrounds.py
@@ -0,0 +1,168 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_duration,
+ parse_filesize,
+ unified_timestamp,
+)
+
+
+class NewgroundsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.newgrounds.com/audio/listen/549479',
+ 'md5': 'fe6033d297591288fa1c1f780386f07a',
+ 'info_dict': {
+ 'id': '549479',
+ 'ext': 'mp3',
+ 'title': 'B7 - BusMode',
+ 'uploader': 'Burn7',
+ 'timestamp': 1378878540,
+ 'upload_date': '20130911',
+ 'duration': 143,
+ },
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/673111',
+ 'md5': '3394735822aab2478c31b1004fe5e5bc',
+ 'info_dict': {
+ 'id': '673111',
+ 'ext': 'mp4',
+ 'title': 'Dancin',
+ 'uploader': 'Squirrelman82',
+ 'timestamp': 1460256780,
+ 'upload_date': '20160410',
+ },
+ }, {
+ # source format unavailable, additional mp4 formats
+ 'url': 'http://www.newgrounds.com/portal/view/689400',
+ 'info_dict': {
+ 'id': '689400',
+ 'ext': 'mp4',
+ 'title': 'ZTV News Episode 8',
+ 'uploader': 'BennettTheSage',
+ 'timestamp': 1487965140,
+ 'upload_date': '20170224',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, media_id)
+
+ title = self._html_search_regex(
+ r'<title>([^>]+)</title>', webpage, 'title')
+
+ media_url = self._parse_json(self._search_regex(
+ r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
+
+ formats = [{
+ 'url': media_url,
+ 'format_id': 'source',
+ 'quality': 1,
+ }]
+
+ max_resolution = int_or_none(self._search_regex(
+ r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
+ default=None))
+ if max_resolution:
+ url_base = media_url.rpartition('.')[0]
+ for resolution in (360, 720, 1080):
+ if resolution > max_resolution:
+ break
+ formats.append({
+ 'url': '%s.%dp.mp4' % (url_base, resolution),
+ 'format_id': '%dp' % resolution,
+ 'height': resolution,
+ })
+
+ self._check_formats(formats, media_id)
+ self._sort_formats(formats)
+
+ uploader = self._html_search_regex(
+ (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
+ fatal=False)
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
+ r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
+ default=None))
+ duration = parse_duration(self._search_regex(
+ r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+ 'duration', default=None))
+
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
+ default=None))
+ if len(formats) == 1:
+ formats[0]['filesize_approx'] = filesize_approx
+
+ if '<dd>Song' in webpage:
+ formats[0]['vcodec'] = 'none'
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class NewgroundsPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.newgrounds.com/collection/cats',
+ 'info_dict': {
+ 'id': 'cats',
+ 'title': 'Cats',
+ },
+ 'playlist_mincount': 46,
+ }, {
+ 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
+ 'info_dict': {
+ 'id': 'ZONE-SAMA',
+ 'title': 'Portal Search: ZONE-SAMA',
+ },
+ 'playlist_mincount': 47,
+ }, {
+ 'url': 'http://www.newgrounds.com/audio/search/title/cats',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._search_regex(
+ r'<title>([^>]+)</title>', webpage, 'title', default=None)
+
+ # cut left menu
+ webpage = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
+ webpage, 'wide column', default=webpage)
+
+ entries = []
+ for a, path, media_id in re.findall(
+ r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
+ webpage):
+ a_class = extract_attributes(a).get('class')
+ if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
+ continue
+ entries.append(
+ self.url_result(
+ 'https://www.newgrounds.com/%s' % path,
+ ie=NewgroundsIE.ie_key(), video_id=media_id))
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/hypervideo_dl/extractor/newstube.py b/hypervideo_dl/extractor/newstube.py
new file mode 100644
index 0000000..dab4aec
--- /dev/null
+++ b/hypervideo_dl/extractor/newstube.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import hashlib
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..utils import (
+ bytes_to_intlist,
+ int_or_none,
+ intlist_to_bytes,
+ parse_codecs,
+ parse_duration,
+)
+
+
+class NewstubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
+ _TEST = {
+ 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
+ 'md5': '9d10320ad473444352f72f746ccb8b8c',
+ 'info_dict': {
+ 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
+ 'ext': 'mp4',
+ 'title': 'Телеканал CNN переместил город Славянск в Крым',
+ 'description': 'md5:419a8c9f03442bc0b0a794d689360335',
+ 'duration': 31.05,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page = self._download_webpage(url, video_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True)
+
+ video_guid = self._html_search_regex(
+ r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ page, 'video GUID')
+
+ enc_data = base64.b64decode(self._download_webpage(
+ 'https://www.newstube.ru/embed/api/player/getsources2',
+ video_guid, query={
+ 'guid': video_guid,
+ 'ff': 3,
+ }))
+ key = hashlib.pbkdf2_hmac(
+ 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16]
+ dec_data = aes_cbc_decrypt(
+ bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key),
+ bytes_to_intlist(enc_data[16:32]))
+ sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid)
+
+ formats = []
+ for source in sources:
+ source_url = source.get('Src')
+ if not source_url:
+ continue
+ height = int_or_none(source.get('Height'))
+ f = {
+ 'format_id': 'http' + ('-%dp' % height if height else ''),
+ 'url': source_url,
+ 'width': int_or_none(source.get('Width')),
+ 'height': height,
+ }
+ source_type = source.get('Type')
+ if source_type:
+ f.update(parse_codecs(self._search_regex(
+ r'codecs="([^"]+)"', source_type, 'codecs', fatal=False)))
+ formats.append(f)
+
+ self._check_formats(formats, video_guid)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_guid,
+ 'title': title,
+ 'description': self._html_search_meta(['description', 'og:description'], page),
+ 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page),
+ 'duration': parse_duration(self._html_search_meta('duration', page)),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nextmedia.py b/hypervideo_dl/extractor/nextmedia.py
new file mode 100644
index 0000000..7bd1290
--- /dev/null
+++ b/hypervideo_dl/extractor/nextmedia.py
@@ -0,0 +1,238 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+ unified_timestamp,
+)
+
+
+class NextMediaIE(InfoExtractor):
+ IE_DESC = '蘋果日報'
+ _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
+ 'md5': 'dff9fad7009311c421176d1ac90bfe4f',
+ 'info_dict': {
+ 'id': '53109199',
+ 'ext': 'mp4',
+ 'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:28222b9912b6665a21011b034c70fcc7',
+ 'timestamp': 1415456273,
+ 'upload_date': '20141108',
+ }
+ }]
+
+ _URL_PATTERN = r'\{ url: \'(.+)\' \}'
+
+ def _real_extract(self, url):
+ news_id = self._match_id(url)
+ page = self._download_webpage(url, news_id)
+ return self._extract_from_nextmedia_page(news_id, url, page)
+
+ def _extract_from_nextmedia_page(self, news_id, url, page):
+ redirection_url = self._search_regex(
+ r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
+ page, 'redirection URL', default=None, group='url')
+ if redirection_url:
+ return self.url_result(compat_urlparse.urljoin(url, redirection_url))
+
+ title = self._fetch_title(page)
+ video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
+
+ attrs = {
+ 'id': news_id,
+ 'title': title,
+ 'url': video_url, # ext can be inferred from url
+ 'thumbnail': self._fetch_thumbnail(page),
+ 'description': self._fetch_description(page),
+ }
+
+ timestamp = self._fetch_timestamp(page)
+ if timestamp:
+ attrs['timestamp'] = timestamp
+ else:
+ attrs['upload_date'] = self._fetch_upload_date(url)
+
+ return attrs
+
+ def _fetch_title(self, page):
+ return self._og_search_title(page)
+
+ def _fetch_thumbnail(self, page):
+ return self._og_search_thumbnail(page)
+
+ def _fetch_timestamp(self, page):
+ dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
+ return parse_iso8601(dateCreated)
+
+ def _fetch_upload_date(self, url):
+ return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
+
+ def _fetch_description(self, page):
+ return self._og_search_property('description', page)
+
+
+class NextMediaActionNewsIE(NextMediaIE):
+ IE_DESC = '蘋果日報 - 動新聞'
+ _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
+ _TESTS = [{
+ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
+ 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
+ 'info_dict': {
+ 'id': '19009428',
+ 'ext': 'mp4',
+ 'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
+ 'timestamp': 1421791200,
+ 'upload_date': '20150120',
+ }
+ }]
+
+ def _real_extract(self, url):
+ news_id = self._match_id(url)
+ actionnews_page = self._download_webpage(url, news_id)
+ article_url = self._og_search_url(actionnews_page)
+ article_page = self._download_webpage(article_url, news_id)
+ return self._extract_from_nextmedia_page(news_id, url, article_page)
+
+
+class AppleDailyIE(NextMediaIE):
+ IE_DESC = '臺灣蘋果日報'
+ _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+ _TESTS = [{
+ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
+ 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
+ 'info_dict': {
+ 'id': '36354694',
+ 'ext': 'mp4',
+ 'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
+ 'upload_date': '20150128',
+ }
+ }, {
+ 'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
+ 'md5': '86b4e9132d158279c7883822d94ccc49',
+ 'info_dict': {
+ 'id': '550549',
+ 'ext': 'mp4',
+ 'title': '不滿被踩腳 山東兩大媽一路打下車',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
+ 'upload_date': '20150128',
+ }
+ }, {
+ 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
+ 'md5': '03df296d95dedc2d5886debbb80cb43f',
+ 'info_dict': {
+ 'id': '5003671',
+ 'ext': 'mp4',
+ 'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
+ 'upload_date': '20150128',
+ },
+ 'skip': 'redirect to http://www.appledaily.com.tw/animation/',
+ }, {
+ # No thumbnail
+ 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
+ 'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
+ 'info_dict': {
+ 'id': '5003673',
+ 'ext': 'mp4',
+ 'title': '半夜尿尿 好像會看到___',
+ 'description': 'md5:61d2da7fe117fede148706cdb85ac066',
+ 'upload_date': '20150128',
+ },
+ 'expected_warnings': [
+ 'video thumbnail',
+ ],
+ 'skip': 'redirect to http://www.appledaily.com.tw/animation/',
+ }, {
+ 'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
+ 'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
+ 'info_dict': {
+ 'id': '35770334',
+ 'ext': 'mp4',
+ 'title': '咖啡占卜測 XU裝熟指數',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
+ 'upload_date': '20140417',
+ },
+ }, {
+ 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
+ 'only_matching': True,
+ }, {
+ # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
+ 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
+ 'only_matching': True,
+ }]
+
+ _URL_PATTERN = r'\{url: \'(.+)\'\}'
+
+ def _fetch_title(self, page):
+ return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
+ or self._html_search_meta('description', page, 'news title'))
+
+ def _fetch_thumbnail(self, page):
+ return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
+
+ def _fetch_timestamp(self, page):
+ return None
+
+ def _fetch_description(self, page):
+ return self._html_search_meta('description', page, 'news description')
+
+
+class NextTVIE(InfoExtractor):
+ IE_DESC = '壹電視'
+ _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
+ 'info_dict': {
+ 'id': '11779671',
+ 'ext': 'mp4',
+ 'title': '「超收稅」近4千億! 藍議員籲發消費券',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1484825400,
+ 'upload_date': '20170119',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
+
+ data = self._hidden_inputs(webpage)
+
+ video_url = data['ntt-vod-src-detailview']
+
+ date_str = get_element_by_class('date', webpage)
+ timestamp = unified_timestamp(date_str + '+0800') if date_str else None
+
+ view_count = int_or_none(remove_start(
+ clean_html(get_element_by_class('click', webpage)), '點閱:'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': data.get('ntt-vod-img-src'),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ }
diff --git a/hypervideo_dl/extractor/nexx.py b/hypervideo_dl/extractor/nexx.py
new file mode 100644
index 0000000..586c1b7
--- /dev/null
+++ b/hypervideo_dl/extractor/nexx.py
@@ -0,0 +1,453 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class NexxIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
+ nexx:(?:(?P<domain_id_s>\d+):)?|
+ https?://arc\.nexx\.cloud/api/video/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # movie
+ 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
+ 'md5': '31899fd683de49ad46f4ee67e53e83fe',
+ 'info_dict': {
+ 'id': '128907',
+ 'ext': 'mp4',
+ 'title': 'Stiftung Warentest',
+ 'alt_title': 'Wie ein Test abläuft',
+ 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2509,
+ 'timestamp': 1384264416,
+ 'upload_date': '20131112',
+ },
+ }, {
+ # episode
+ 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
+ 'info_dict': {
+ 'id': '247858',
+ 'ext': 'mp4',
+ 'title': 'Return of the Golden Child (OV)',
+ 'description': 'md5:5d969537509a92b733de21bae249dc63',
+ 'release_year': 2017,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1397,
+ 'timestamp': 1495033267,
+ 'upload_date': '20170517',
+ 'episode_number': 2,
+ 'season_number': 2,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ # does not work via arc
+ 'url': 'nexx:741:1269984',
+ 'md5': 'c714b5b238b2958dc8d5642addba6886',
+ 'info_dict': {
+ 'id': '1269984',
+ 'ext': 'mp4',
+ 'title': '1 TAG ohne KLO... wortwörtlich! 😑',
+ 'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 607,
+ 'timestamp': 1518614955,
+ 'upload_date': '20180214',
+ },
+ }, {
+ # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
+ 'url': 'nexx:747:1533779',
+ 'md5': '6bf6883912b82b7069fb86c2297e9893',
+ 'info_dict': {
+ 'id': '1533779',
+ 'ext': 'mp4',
+ 'title': 'Aufregung um ausgebrochene Raubtiere',
+ 'alt_title': 'Eifel-Zoo',
+ 'description': 'md5:f21375c91c74ad741dcb164c427999d2',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 111,
+ 'timestamp': 1527874460,
+ 'upload_date': '20180601',
+ },
+ }, {
+ 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'nexx:748:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'nexx:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://arc.nexx.cloud/api/video/128907.json',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_domain_id(webpage):
+ mobj = re.search(
+ r'<script\b[^>]+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P<id>\d+)',
+ webpage)
+ return mobj.group('id') if mobj else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ entries = []
+
+ # JavaScript Integration
+ domain_id = NexxIE._extract_domain_id(webpage)
+ if domain_id:
+ for video_id in re.findall(
+ r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)',
+ webpage):
+ entries.append(
+ 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
+ % (domain_id, video_id))
+
+ # TODO: support more embed formats
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ return NexxIE._extract_urls(webpage)[0]
+
+ def _handle_error(self, response):
+ status = int_or_none(try_get(
+ response, lambda x: x['metadata']['status']) or 200)
+ if 200 <= status < 300:
+ return
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
+ expected=True)
+
+ def _call_api(self, domain_id, path, video_id, data=None, headers={}):
+ headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
+ result = self._download_json(
+ 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
+ 'Downloading %s JSON' % path, data=urlencode_postdata(data),
+ headers=headers)
+ self._handle_error(result)
+ return result['result']
+
+ def _extract_free_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'free'
+
+ hash = video['general']['hash']
+
+ ps = compat_str(stream_data['originalDomain'])
+ if stream_data['applyFolderHierarchy'] == 1:
+ s = ('%04d' % int(video_id))[::-1]
+ ps += '/%s/%s' % (s[0:2], s[2:4])
+ ps += '/%s/%s_' % (video_id, hash)
+
+ t = 'http://%s' + ps
+ fd = stream_data['azureFileDistribution'].split(',')
+ cdn_provider = stream_data['cdnProvider']
+
+ def p0(p):
+ return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
+
+ formats = []
+ if cdn_provider == 'ak':
+ t += ','
+ for i in fd:
+ p = i.split(':')
+ t += p[1] + p0(int(p[0])) + ','
+ t += '.mp4.csmil/master.%s'
+ elif cdn_provider == 'ce':
+ k = t.split('/')
+ h = k.pop()
+ http_base = t = '/'.join(k)
+ http_base = http_base % stream_data['cdnPathHTTP']
+ t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
+ for i in fd:
+ p = i.split(':')
+ tbr = int(p[0])
+ filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
+ f = {
+ 'url': http_base + '/' + filename,
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = p[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+ a = filename + ':%s' % (tbr * 1000)
+ t += a + ','
+ t = t[:-1] + '&audiostream=' + a.split(':')[0]
+ else:
+ assert False
+
+ if cdn_provider == 'ce':
+ formats.extend(self._extract_mpd_formats(
+ t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
+ mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
+
+ return formats
+
+ def _extract_azure_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'azure'
+
+ azure_locator = stream_data['azureLocator']
+
+ def get_cdn_shield_base(shield_type='', static=False):
+ for secure in ('', 's'):
+ cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+ if cdn_shield:
+ return 'http%s://%s' % (secure, cdn_shield)
+ else:
+ if 'fb' in stream_data['azureAccount']:
+ prefix = 'df' if static else 'f'
+ else:
+ prefix = 'd' if static else 'p'
+ account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+ return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
+
+ language = video['general'].get('language_raw') or ''
+
+ azure_stream_base = get_cdn_shield_base()
+ is_ml = ',' in language
+ azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
+ azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
+
+ protection_token = try_get(
+ video, lambda x: x['protectiondata']['token'], compat_str)
+ if protection_token:
+ azure_manifest_url += '?hdnts=%s' % protection_token
+
+ formats = self._extract_m3u8_formats(
+ azure_manifest_url % '(format=m3u8-aapl)',
+ video_id, 'mp4', 'm3u8_native',
+ m3u8_id='%s-hls' % cdn, fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ azure_manifest_url % '(format=mpd-time-csf)',
+ video_id, mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_ism_formats(
+ azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+
+ azure_progressive_base = get_cdn_shield_base('Prog', True)
+ azure_file_distribution = stream_data.get('azureFileDistribution')
+ if azure_file_distribution:
+ fds = azure_file_distribution.split(',')
+ if fds:
+ for fd in fds:
+ ss = fd.split(':')
+ if len(ss) == 2:
+ tbr = int_or_none(ss[0])
+ if tbr:
+ f = {
+ 'url': '%s%s/%s_src_%s_%d.mp4' % (
+ azure_progressive_base, azure_locator, video_id, ss[1], tbr),
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = ss[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
+ video_id = mobj.group('id')
+
+ video = None
+
+ def find_video(result):
+ if isinstance(result, dict):
+ return result
+ elif isinstance(result, list):
+ vid = int(video_id)
+ for v in result:
+ if try_get(v, lambda x: x['general']['ID'], int) == vid:
+ return v
+ return None
+
+ response = self._download_json(
+ 'https://arc.nexx.cloud/api/video/%s.json' % video_id,
+ video_id, fatal=False)
+ if response and isinstance(response, dict):
+ result = response.get('result')
+ if result:
+ video = find_video(result)
+
+ # not all videos work via arc, e.g. nexx:741:1269984
+ if not video:
+ # Reverse engineered from JS code (see getDeviceID function)
+ device_id = '%d:%d:%d%d' % (
+ random.randint(1, 4), int(time.time()),
+ random.randint(1e4, 99999), random.randint(1, 9))
+
+ result = self._call_api(domain_id, 'session/init', video_id, data={
+ 'nxp_devh': device_id,
+ 'nxp_userh': '',
+ 'precid': '0',
+ 'playlicense': '0',
+ 'screenx': '1920',
+ 'screeny': '1080',
+ 'playerversion': '6.0.00',
+ 'gateway': 'html5',
+ 'adGateway': '',
+ 'explicitlanguage': 'en-US',
+ 'addTextTemplates': '1',
+ 'addDomainData': '1',
+ 'addAdModel': '1',
+ }, headers={
+ 'X-Request-Enable-Auth-Fallback': '1',
+ })
+
+ cid = result['general']['cid']
+
+ # As described in [1] X-Request-Token generation algorithm is
+ # as follows:
+ # md5( operation + domain_id + domain_secret )
+ # where domain_secret is a static value that will be given by nexx.tv
+ # as per [1]. Here is how this "secret" is generated (reversed
+ # from _play.api.init function, search for clienttoken). So it's
+ # actually not static and not that much of a secret.
+ # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
+ secret = result['device']['clienttoken'][int(device_id[0]):]
+ secret = secret[0:len(secret) - int(device_id[-1])]
+
+ op = 'byid'
+
+ # Reversed from JS code for _play.api.call function (search for
+ # X-Request-Token)
+ request_token = hashlib.md5(
+ ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
+
+ result = self._call_api(
+ domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
+ 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
+ 'addInteractionOptions': '1',
+ 'addStatusDetails': '1',
+ 'addStreamDetails': '1',
+ 'addCaptions': '1',
+ 'addScenes': '1',
+ 'addHotSpots': '1',
+ 'addBumpers': '1',
+ 'captionFormat': 'data',
+ }, headers={
+ 'X-Request-CID': cid,
+ 'X-Request-Token': request_token,
+ })
+ video = find_video(result)
+
+ general = video['general']
+ title = general['title']
+
+ cdn = video['streamdata']['cdnType']
+
+ if cdn == 'azure':
+ formats = self._extract_azure_formats(video, video_id)
+ elif cdn == 'free':
+ formats = self._extract_free_formats(video, video_id)
+ else:
+ # TODO: reverse more cdns
+ assert False
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': general.get('subtitle'),
+ 'description': general.get('description'),
+ 'release_year': int_or_none(general.get('year')),
+ 'creator': general.get('studio') or general.get('studio_adref'),
+ 'thumbnail': try_get(
+ video, lambda x: x['imagedata']['thumb'], compat_str),
+ 'duration': parse_duration(general.get('runtime')),
+ 'timestamp': int_or_none(general.get('uploaded')),
+ 'episode_number': int_or_none(try_get(
+ video, lambda x: x['episodedata']['episode'])),
+ 'season_number': int_or_none(try_get(
+ video, lambda x: x['episodedata']['season'])),
+ 'formats': formats,
+ }
+
+
+class NexxEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
+ 'md5': '16746bfc28c42049492385c989b26c4a',
+ 'info_dict': {
+ 'id': '161464',
+ 'ext': 'mp4',
+ 'title': 'Nervenkitzel Achterbahn',
+ 'alt_title': 'Karussellbauer in Deutschland',
+ 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2761,
+ 'timestamp': 1394021479,
+ 'upload_date': '20140305',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ # iFrame Embed Integration
+ return [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, embed_id)
+
+ return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())
diff --git a/hypervideo_dl/extractor/nfl.py b/hypervideo_dl/extractor/nfl.py
new file mode 100644
index 0000000..871923e
--- /dev/null
+++ b/hypervideo_dl/extractor/nfl.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ get_element_by_class,
+)
+
+
+class NFLBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'''(?x)
+ https?://
+ (?P<host>
+ (?:www\.)?
+ (?:
+ (?:
+ nfl|
+ buffalobills|
+ miamidolphins|
+ patriots|
+ newyorkjets|
+ baltimoreravens|
+ bengals|
+ clevelandbrowns|
+ steelers|
+ houstontexans|
+ colts|
+ jaguars|
+ (?:titansonline|tennesseetitans)|
+ denverbroncos|
+ (?:kc)?chiefs|
+ raiders|
+ chargers|
+ dallascowboys|
+ giants|
+ philadelphiaeagles|
+ (?:redskins|washingtonfootball)|
+ chicagobears|
+ detroitlions|
+ packers|
+ vikings|
+ atlantafalcons|
+ panthers|
+ neworleanssaints|
+ buccaneers|
+ azcardinals|
+ (?:stlouis|the)rams|
+ 49ers|
+ seahawks
+ )\.com|
+ .+?\.clubs\.nfl\.com
+ )
+ )/
+ '''
+ _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})'
+ _WORKING = False
+
+ def _parse_video_config(self, video_config, display_id):
+ video_config = self._parse_json(video_config, display_id)
+ item = video_config['playlist'][0]
+ mcp_id = item.get('mcpID')
+ if mcp_id:
+ info = self.url_result(
+ 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id,
+ 'Anvato', mcp_id)
+ else:
+ media_id = item.get('id') or item['entityId']
+ title = item['title']
+ item_url = item['url']
+ info = {'id': media_id}
+ ext = determine_ext(item_url)
+ if ext == 'm3u8':
+ info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
+ self._sort_formats(info['formats'])
+ else:
+ info['url'] = item_url
+ if item.get('audio') is True:
+ info['vcodec'] = 'none'
+ is_live = video_config.get('live') is True
+ thumbnails = None
+ image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
+ if image_url:
+ thumbnails = [{
+ 'url': image_url,
+ 'ext': determine_ext(image_url, 'jpg'),
+ }]
+ info.update({
+ 'title': self._live_title(title) if is_live else title,
+ 'is_live': is_live,
+ 'description': clean_html(item.get('description')),
+ 'thumbnails': thumbnails,
+ })
+ return info
+
+
+class NFLIE(NFLBaseIE):
+ IE_NAME = 'nfl.com'
+ _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
+ _TESTS = [{
+ 'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
+ 'info_dict': {
+ 'id': '899441',
+ 'ext': 'mp4',
+ 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
+ 'description': 'md5:85e05a3cc163f8c344340f220521136d',
+ 'upload_date': '20201215',
+ 'timestamp': 1608009755,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'NFL',
+ }
+ }, {
+ 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
+ 'md5': '6886b32c24b463038c760ceb55a34566',
+ 'info_dict': {
+ 'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
+ 'ext': 'mp3',
+ 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
+ 'description': 'md5:12ada8ee70e6762658c30e223e095075',
+ }
+ }, {
+ 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ return self._parse_video_config(self._search_regex(
+ self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
+
+
+class NFLArticleIE(NFLBaseIE):
+ IE_NAME = 'nfl.com:article'
+ _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
+ _TEST = {
+ 'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
+ 'info_dict': {
+ 'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
+ 'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
+ },
+ 'playlist_count': 4,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ entries = []
+ for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
+ entries.append(self._parse_video_config(video_config, display_id))
+ title = clean_html(get_element_by_class(
+ 'nfl-c-article__title', webpage)) or self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage)
+ return self.playlist_result(entries, display_id, title)
diff --git a/hypervideo_dl/extractor/nhk.py b/hypervideo_dl/extractor/nhk.py
new file mode 100644
index 0000000..8a9331a
--- /dev/null
+++ b/hypervideo_dl/extractor/nhk.py
@@ -0,0 +1,178 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import urljoin
+
+
+class NhkBaseIE(InfoExtractor):
+ _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+ _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
+ _TYPE_REGEX = r'/(?P<type>video|audio)/'
+
+ def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
+ return self._download_json(
+ self._API_URL_TEMPLATE % (
+ 'v' if is_video else 'r',
+ 'clip' if is_clip else 'esd',
+ 'episode' if is_episode else 'program',
+ m_id, lang, '/all' if is_video else ''),
+ m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
+
+ def _extract_episode_info(self, url, episode=None):
+ fetch_episode = episode is None
+ lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
+ if episode_id.isdigit():
+ episode_id = episode_id[:4] + '-' + episode_id[4:]
+
+ is_video = m_type == 'video'
+ if fetch_episode:
+ episode = self._call_api(
+ episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
+ title = episode.get('sub_title_clean') or episode['sub_title']
+
+ def get_clean_field(key):
+ return episode.get(key + '_clean') or episode.get(key)
+
+ series = get_clean_field('title')
+
+ thumbnails = []
+ for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
+ img_path = episode.get('image' + s)
+ if not img_path:
+ continue
+ thumbnails.append({
+ 'id': '%dp' % h,
+ 'height': h,
+ 'width': w,
+ 'url': 'https://www3.nhk.or.jp' + img_path,
+ })
+
+ info = {
+ 'id': episode_id + '-' + lang,
+ 'title': '%s - %s' % (series, title) if series and title else title,
+ 'description': get_clean_field('description'),
+ 'thumbnails': thumbnails,
+ 'series': series,
+ 'episode': title,
+ }
+ if is_video:
+ vod_id = episode['vod_id']
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'Piksel',
+ 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id,
+ 'id': vod_id,
+ })
+ else:
+ if fetch_episode:
+ audio_path = episode['audio']['audio']
+ info['formats'] = self._extract_m3u8_formats(
+ 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+ episode_id, 'm4a', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in info['formats']:
+ f['language'] = lang
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': NhkVodIE.ie_key(),
+ 'url': url,
+ })
+ return info
+
+
+class NhkVodIE(NhkBaseIE):
+ _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+ # Content available only for a limited period of time. Visit
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
+ _TESTS = [{
+ # video clip
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+ 'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
+ 'info_dict': {
+ 'id': 'a95j5iza',
+ 'ext': 'mp4',
+ 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+ 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+ 'timestamp': 1565965194,
+ 'upload_date': '20190816',
+ },
+ }, {
+ # audio clip
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
+ 'info_dict': {
+ 'id': 'r_inventions-20201104-1-en',
+ 'ext': 'm4a',
+ 'title': "Japan's Top Inventions - Miniature Video Cameras",
+ 'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_episode_info(url)
+
+
+class NhkVodProgramIE(NhkBaseIE):
+ _VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+ _TESTS = [{
+ # video program episodes
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
+ 'info_dict': {
+ 'id': 'japanrailway',
+ 'title': 'Japan Railway Journal',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ # video program clips
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
+ 'info_dict': {
+ 'id': 'japanrailway',
+ 'title': 'Japan Railway Journal',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
+ 'only_matching': True,
+ }, {
+ # audio program
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups()
+
+ episodes = self._call_api(
+ program_id, lang, m_type == 'video', False, episode_type == 'clip')
+
+ entries = []
+ for episode in episodes:
+ episode_path = episode.get('url')
+ if not episode_path:
+ continue
+ entries.append(self._extract_episode_info(
+ urljoin(url, episode_path), episode))
+
+ program_title = None
+ if entries:
+ program_title = entries[0].get('series')
+
+ return self.playlist_result(entries, program_id, program_title)
diff --git a/hypervideo_dl/extractor/nhl.py b/hypervideo_dl/extractor/nhl.py
new file mode 100644
index 0000000..eddfe1f
--- /dev/null
+++ b/hypervideo_dl/extractor/nhl.py
@@ -0,0 +1,128 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ parse_duration,
+)
+
+
+class NHLBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ site, tmp_id = re.match(self._VALID_URL, url).groups()
+ video_data = self._download_json(
+ 'https://%s/%s/%sid/v1/%s/details/web-v1.json'
+ % (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
+ if video_data.get('type') != 'video':
+ video_data = video_data['media']
+ video = video_data.get('video')
+ if video:
+ video_data = video
+ else:
+ videos = video_data.get('videos')
+ if videos:
+ video_data = videos[0]
+
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
+
+ formats = []
+ for playback in video_data.get('playbacks', []):
+ playback_url = playback.get('url')
+ if not playback_url:
+ continue
+ ext = determine_ext(playback_url)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ playback_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=playback.get('name', 'hls'), fatal=False)
+ self._check_formats(m3u8_formats, video_id)
+ formats.extend(m3u8_formats)
+ else:
+ height = int_or_none(playback.get('height'))
+ formats.append({
+ 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
+ 'url': playback_url,
+ 'width': int_or_none(playback.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ cuts = video_data.get('image', {}).get('cuts') or []
+ if isinstance(cuts, dict):
+ cuts = cuts.values()
+ for thumbnail_data in cuts:
+ thumbnail_url = thumbnail_data.get('src')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail_data.get('width')),
+ 'height': int_or_none(thumbnail_data.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'timestamp': parse_iso8601(video_data.get('date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+
+class NHLIE(NHLBaseIE):
+ IE_NAME = 'nhl.com'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
+ _CONTENT_DOMAIN = 'nhl.bamcontent.com'
+ _TESTS = [{
+ # type=video
+ 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
+ 'md5': '0f7b9a8f986fb4b4eeeece9a56416eaf',
+ 'info_dict': {
+ 'id': '43663503',
+ 'ext': 'mp4',
+ 'title': 'Anisimov cleans up mess',
+ 'description': 'md5:a02354acdfe900e940ce40706939ca63',
+ 'timestamp': 1461288600,
+ 'upload_date': '20160422',
+ },
+ }, {
+ # type=article
+ 'url': 'https://www.nhl.com/news/dennis-wideman-suspended/c-278258934',
+ 'md5': '1f39f4ea74c1394dea110699a25b366c',
+ 'info_dict': {
+ 'id': '40784403',
+ 'ext': 'mp4',
+ 'title': 'Wideman suspended by NHL',
+ 'description': 'Flames defenseman Dennis Wideman was banned 20 games for violation of Rule 40 (Physical Abuse of Officials)',
+ 'upload_date': '20160204',
+ 'timestamp': 1454544904,
+ },
+ }, {
+ # Some m3u8 URLs are invalid (https://github.com/ytdl-org/youtube-dl/issues/10713)
+ 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
+ 'md5': '50b2bb47f405121484dda3ccbea25459',
+ 'info_dict': {
+ 'id': '44315003',
+ 'ext': 'mp4',
+ 'title': 'Poile, Laviolette on Subban trade',
+ 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)',
+ 'timestamp': 1467242866,
+ 'upload_date': '20160629',
+ },
+ }, {
+ 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/nick.py b/hypervideo_dl/extractor/nick.py
new file mode 100644
index 0000000..2e8b302
--- /dev/null
+++ b/hypervideo_dl/extractor/nick.py
@@ -0,0 +1,249 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .mtv import MTVServicesInfoExtractor
+from ..utils import update_url_query
+
+
+class NickIE(MTVServicesInfoExtractor):
+ # None of videos on the website are still alive?
+ IE_NAME = 'nick.com'
+ _VALID_URL = r'https?://(?P<domain>(?:(?:www|beta)\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
+ _GEO_COUNTRIES = ['US']
+ _TESTS = [{
+ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
+ 'playlist': [
+ {
+ 'md5': '6e5adc1e28253bbb1b28ab05403dd4d4',
+ 'info_dict': {
+ 'id': 'be6a17b0-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S1',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+
+ }
+ },
+ {
+ 'md5': 'd7be441fc53a1d4882fa9508a1e5b3ce',
+ 'info_dict': {
+ 'id': 'be6b8f96-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S2',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+
+ }
+ },
+ {
+ 'md5': 'efffe1728a234b2b0d2f2b343dd1946f',
+ 'info_dict': {
+ 'id': 'be6cf7e6-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S3',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ }
+ },
+ {
+ 'md5': '1ec6690733ab9f41709e274a1d5c7556',
+ 'info_dict': {
+ 'id': 'be6e3354-412d-11e5-8ff7-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S4',
+ 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ }
+ },
+ ],
+ }, {
+ 'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://beta.nick.com/nicky-ricky-dicky-and-dawn/videos/nicky-ricky-dicky-dawn-301-full-episode/',
+ 'only_matching': True,
+ }]
+
+ def _get_feed_query(self, uri):
+ return {
+ 'feed': 'nick_arc_player_prime',
+ 'mgid': uri,
+ }
+
+ def _real_extract(self, url):
+ domain, display_id = re.match(self._VALID_URL, url).groups()
+ video_data = self._download_json(
+ 'http://%s/data/video.endLevel.json' % domain,
+ display_id, query={
+ 'urlKey': display_id,
+ })
+ return self._get_videos_info(video_data['player'] + video_data['id'])
+
+
+class NickBrIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nickelodeon:br'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
+ (?:www\.)?nickjr\.[a-z]{2}|
+ (?:www\.)?nickelodeonjunior\.fr
+ )
+ /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ uri = self._search_regex(
+ r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid')
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html',
+ video_id, query={
+ 'uri': uri,
+ 'configtype': 'edge',
+ }, headers={
+ 'Referer': url,
+ })
+ info_url = self._remove_template_parameter(config['feedWithQueryParams'])
+ if info_url == 'None':
+ if domain.startswith('www.'):
+ domain = domain[4:]
+ content_domain = {
+ 'mundonick.uol': 'mundonick.com.br',
+ 'nickjr': 'br.nickelodeonjunior.tv',
+ }[domain]
+ query = {
+ 'mgid': uri,
+ 'imageEp': content_domain,
+ 'arcEp': content_domain,
+ }
+ if domain == 'nickjr.com.br':
+ query['ep'] = 'c4b16088'
+ info_url = update_url_query(
+ 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed', query)
+ return self._get_videos_info_from_url(info_url, video_id)
+
+
+class NickDeIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nick.de'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.de/shows/342-icarly',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
+ 'only_matching': True,
+ }]
+
+ def _extract_mrss_url(self, webpage, host):
+ return update_url_query(self._search_regex(
+ r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
+ {'siteKey': host})
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+
+ webpage = self._download_webpage(url, video_id)
+
+ mrss_url = self._extract_mrss_url(webpage, host)
+
+ return self._get_videos_info_from_url(mrss_url, video_id)
+
+
+class NickNightIE(NickDeIE):
+ IE_NAME = 'nicknight'
+ _VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nicknight.at/shows/977-awkward',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nicknight.at/shows/1900-faking-it',
+ 'only_matching': True,
+ }]
+
+ def _extract_mrss_url(self, webpage, *args):
+ return self._search_regex(
+ r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
+ 'mrss url', group='url')
+
+
+class NickRuIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nickelodeonru'
+ _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/hypervideo_dl/extractor/niconico.py b/hypervideo_dl/extractor/niconico.py
new file mode 100644
index 0000000..a85fc3d
--- /dev/null
+++ b/hypervideo_dl/extractor/niconico.py
@@ -0,0 +1,515 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import functools
+import json
+import math
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ InAdvancePagedList,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ remove_start,
+ try_get,
+ unified_timestamp,
+ urlencode_postdata,
+ xpath_text,
+)
+
+
+class NiconicoIE(InfoExtractor):
+ IE_NAME = 'niconico'
+ IE_DESC = 'ニコニコ動画'
+
+ _TESTS = [{
+ 'url': 'http://www.nicovideo.jp/watch/sm22312215',
+ 'md5': 'd1a75c0823e2f629128c43e1212760f9',
+ 'info_dict': {
+ 'id': 'sm22312215',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'thumbnail': r're:https?://.*',
+ 'uploader': 'takuya0301',
+ 'uploader_id': '2698420',
+ 'upload_date': '20131123',
+ 'timestamp': int, # timestamp is unstable
+ 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+ 'duration': 33,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ # File downloaded with and without credentials are different, so omit
+ # the md5 field
+ 'url': 'http://www.nicovideo.jp/watch/nm14296458',
+ 'info_dict': {
+ 'id': 'nm14296458',
+ 'ext': 'swf',
+ 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
+ 'description': 'md5:689f066d74610b3b22e0f1739add0f58',
+ 'thumbnail': r're:https?://.*',
+ 'uploader': 'りょうた',
+ 'uploader_id': '18822557',
+ 'upload_date': '20110429',
+ 'timestamp': 1304065916,
+ 'duration': 209,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ # 'video exists but is marked as "deleted"
+ # md5 is unstable
+ 'url': 'http://www.nicovideo.jp/watch/sm10000',
+ 'info_dict': {
+ 'id': 'sm10000',
+ 'ext': 'unknown_video',
+ 'description': 'deleted',
+ 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
+ 'thumbnail': r're:https?://.*',
+ 'upload_date': '20071224',
+ 'timestamp': int, # timestamp field has different value if logged in
+ 'duration': 304,
+ 'view_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ 'url': 'http://www.nicovideo.jp/watch/so22543406',
+ 'info_dict': {
+ 'id': '1388129933',
+ 'ext': 'mp4',
+ 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
+ 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+ 'thumbnail': r're:https?://.*',
+ 'timestamp': 1388851200,
+ 'upload_date': '20140104',
+ 'uploader': 'アニメロチャンネル',
+ 'uploader_id': '312',
+ },
+ 'skip': 'The viewing period of the video you were searching for has expired.',
+ }, {
+ # video not available via `getflv`; "old" HTML5 video
+ 'url': 'http://www.nicovideo.jp/watch/sm1151009',
+ 'md5': '8fa81c364eb619d4085354eab075598a',
+ 'info_dict': {
+ 'id': 'sm1151009',
+ 'ext': 'mp4',
+ 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
+ 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 184,
+ 'timestamp': 1190868283,
+ 'upload_date': '20070927',
+ 'uploader': 'denden2',
+ 'uploader_id': '1392194',
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ # "New" HTML5 video
+ # md5 is unstable
+ 'url': 'http://www.nicovideo.jp/watch/sm31464864',
+ 'info_dict': {
+ 'id': 'sm31464864',
+ 'ext': 'mp4',
+ 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
+ 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
+ 'timestamp': 1498514060,
+ 'upload_date': '20170626',
+ 'uploader': 'ゲスト',
+ 'uploader_id': '40826363',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 198,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ # Video without owner
+ 'url': 'http://www.nicovideo.jp/watch/sm18238488',
+ 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+ 'info_dict': {
+ 'id': 'sm18238488',
+ 'ext': 'mp4',
+ 'title': '【実写版】ミュータントタートルズ',
+ 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+ 'timestamp': 1341160408,
+ 'upload_date': '20120701',
+ 'uploader': None,
+ 'uploader_id': None,
+ 'thumbnail': r're:https?://.*',
+ 'duration': 5271,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
+ 'only_matching': True,
+ }]
+
+ _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
+ _NETRC_MACHINE = 'niconico'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ # No authentication to be performed
+ if not username:
+ return True
+
+ # Log in
+ login_ok = True
+ login_form_strs = {
+ 'mail_tel': username,
+ 'password': password,
+ }
+ urlh = self._request_webpage(
+ 'https://account.nicovideo.jp/api/v1/login', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(login_form_strs))
+ if urlh is False:
+ login_ok = False
+ else:
+ parts = compat_urllib_parse_urlparse(urlh.geturl())
+ if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
+ login_ok = False
+ if not login_ok:
+ self._downloader.report_warning('unable to log in: bad username or password')
+ return login_ok
+
+ def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+ def yesno(boolean):
+ return 'yes' if boolean else 'no'
+
+ session_api_data = api_data['video']['dmcInfo']['session_api']
+ session_api_endpoint = session_api_data['urls'][0]
+
+ format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+
+ session_response = self._download_json(
+ session_api_endpoint['url'], video_id,
+ query={'_format': 'json'},
+ headers={'Content-Type': 'application/json'},
+ note='Downloading JSON metadata for %s' % format_id,
+ data=json.dumps({
+ 'session': {
+ 'client_info': {
+ 'player_id': session_api_data['player_id'],
+ },
+ 'content_auth': {
+ 'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
+ 'content_key_timeout': session_api_data['content_key_timeout'],
+ 'service_id': 'nicovideo',
+ 'service_user_id': session_api_data['service_user_id']
+ },
+ 'content_id': session_api_data['content_id'],
+ 'content_src_id_sets': [{
+ 'content_src_ids': [{
+ 'src_id_to_mux': {
+ 'audio_src_ids': [audio_quality['id']],
+ 'video_src_ids': [video_quality['id']],
+ }
+ }]
+ }],
+ 'content_type': 'movie',
+ 'content_uri': '',
+ 'keep_method': {
+ 'heartbeat': {
+ 'lifetime': session_api_data['heartbeat_lifetime']
+ }
+ },
+ 'priority': session_api_data['priority'],
+ 'protocol': {
+ 'name': 'http',
+ 'parameters': {
+ 'http_parameters': {
+ 'parameters': {
+ 'http_output_download_parameters': {
+ 'use_ssl': yesno(session_api_endpoint['is_ssl']),
+ 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
+ }
+ }
+ }
+ }
+ },
+ 'recipe_id': session_api_data['recipe_id'],
+ 'session_operation_auth': {
+ 'session_operation_auth_by_signature': {
+ 'signature': session_api_data['signature'],
+ 'token': session_api_data['token'],
+ }
+ },
+ 'timing_constraint': 'unlimited'
+ }
+ }).encode())
+
+ resolution = video_quality.get('resolution', {})
+
+ return {
+ 'url': session_response['data']['session']['content_uri'],
+ 'format_id': format_id,
+ 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
+ 'abr': float_or_none(audio_quality.get('bitrate'), 1000),
+ 'vbr': float_or_none(video_quality.get('bitrate'), 1000),
+ 'height': resolution.get('height'),
+ 'width': resolution.get('width'),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Get video webpage. We are not actually interested in it for normal
+ # cases, but need the cookies in order to be able to download the
+ # info webpage
+ webpage, handle = self._download_webpage_handle(
+ 'http://www.nicovideo.jp/watch/' + video_id, video_id)
+ if video_id.startswith('so'):
+ video_id = self._match_id(handle.geturl())
+
+ api_data = self._parse_json(self._html_search_regex(
+ 'data-api-data="([^"]+)"', webpage,
+ 'API data', default='{}'), video_id)
+
+ def _format_id_from_url(video_url):
+ return 'economy' if video_real_url.endswith('low') else 'normal'
+
+ try:
+ video_real_url = api_data['video']['smileInfo']['url']
+ except KeyError: # Flash videos
+ # Get flv info
+ flv_info_webpage = self._download_webpage(
+ 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
+ video_id, 'Downloading flv info')
+
+ flv_info = compat_parse_qs(flv_info_webpage)
+ if 'url' not in flv_info:
+ if 'deleted' in flv_info:
+ raise ExtractorError('The video has been deleted.',
+ expected=True)
+ elif 'closed' in flv_info:
+ raise ExtractorError('Niconico videos now require logging in',
+ expected=True)
+ elif 'error' in flv_info:
+ raise ExtractorError('%s reports error: %s' % (
+ self.IE_NAME, flv_info['error'][0]), expected=True)
+ else:
+ raise ExtractorError('Unable to find video URL')
+
+ video_info_xml = self._download_xml(
+ 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+ video_id, note='Downloading video info page')
+
+ def get_video_info(items):
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ ret = xpath_text(video_info_xml, './/' + item)
+ if ret:
+ return ret
+
+ video_real_url = flv_info['url'][0]
+
+ extension = get_video_info('movie_type')
+ if not extension:
+ extension = determine_ext(video_real_url)
+
+ formats = [{
+ 'url': video_real_url,
+ 'ext': extension,
+ 'format_id': _format_id_from_url(video_real_url),
+ }]
+ else:
+ formats = []
+
+ dmc_info = api_data['video'].get('dmcInfo')
+ if dmc_info: # "New" HTML5 videos
+ quality_info = dmc_info['quality']
+ for audio_quality in quality_info['audios']:
+ for video_quality in quality_info['videos']:
+ if not audio_quality['available'] or not video_quality['available']:
+ continue
+ formats.append(self._extract_format_for_quality(
+ api_data, video_id, audio_quality, video_quality))
+
+ self._sort_formats(formats)
+ else: # "Old" HTML5 videos
+ formats = [{
+ 'url': video_real_url,
+ 'ext': 'mp4',
+ 'format_id': _format_id_from_url(video_real_url),
+ }]
+
+ def get_video_info(items):
+ return dict_get(api_data['video'], items)
+
+ # Start extracting information
+ title = get_video_info('title')
+ if not title:
+ title = self._og_search_title(webpage, default=None)
+ if not title:
+ title = self._html_search_regex(
+ r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
+ webpage, 'video title')
+
+ watch_api_data_string = self._html_search_regex(
+ r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
+ webpage, 'watch api data', default=None)
+ watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
+ video_detail = watch_api_data.get('videoDetail', {})
+
+ thumbnail = (
+ get_video_info(['thumbnail_url', 'thumbnailURL'])
+ or self._html_search_meta('image', webpage, 'thumbnail', default=None)
+ or video_detail.get('thumbnail'))
+
+ description = get_video_info('description')
+
+ timestamp = (parse_iso8601(get_video_info('first_retrieve'))
+ or unified_timestamp(get_video_info('postedDateTime')))
+ if not timestamp:
+ match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
+ if match:
+ timestamp = parse_iso8601(match.replace('+', ':00+'))
+ if not timestamp and video_detail.get('postedAt'):
+ timestamp = parse_iso8601(
+ video_detail['postedAt'].replace('/', '-'),
+ delimiter=' ', timezone=datetime.timedelta(hours=9))
+
+ view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
+ if not view_count:
+ match = self._html_search_regex(
+ r'>Views: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'view count', default=None)
+ if match:
+ view_count = int_or_none(match.replace(',', ''))
+ view_count = view_count or video_detail.get('viewCount')
+
+ comment_count = (int_or_none(get_video_info('comment_num'))
+ or video_detail.get('commentCount')
+ or try_get(api_data, lambda x: x['thread']['commentCount']))
+ if not comment_count:
+ match = self._html_search_regex(
+ r'>Comments: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'comment count', default=None)
+ if match:
+ comment_count = int_or_none(match.replace(',', ''))
+
+ duration = (parse_duration(
+ get_video_info('length')
+ or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', default=None))
+ or video_detail.get('length')
+ or get_video_info('duration'))
+
+ webpage_url = get_video_info('watch_url') or url
+
+ # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+ # in the JSON, which will cause None to be returned instead of {}.
+ owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
+ uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
+ uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'duration': duration,
+ 'webpage_url': webpage_url,
+ }
+
+
+class NiconicoPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nicovideo.jp/mylist/27411728',
+ 'info_dict': {
+ 'id': '27411728',
+ 'title': 'AKB48のオールナイトニッポン',
+ 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
+ 'uploader': 'のっく',
+ 'uploader_id': '805442',
+ },
+ 'playlist_mincount': 225,
+ }, {
+ 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
+ 'only_matching': True,
+ }]
+ _PAGE_SIZE = 100
+
+ def _call_api(self, list_id, resource, query):
+ return self._download_json(
+ 'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
+ 'Downloading %s JSON metatdata' % resource, query=query,
+ headers={'X-Frontend-Id': 6})['data']['mylist']
+
+ def _parse_owner(self, item):
+ owner = item.get('owner') or {}
+ if owner:
+ return {
+ 'uploader': owner.get('name'),
+ 'uploader_id': owner.get('id'),
+ }
+ return {}
+
+ def _fetch_page(self, list_id, page):
+ page += 1
+ items = self._call_api(list_id, 'page %d' % page, {
+ 'page': page,
+ 'pageSize': self._PAGE_SIZE,
+ })['items']
+ for item in items:
+ video = item.get('video') or {}
+ video_id = video.get('id')
+ if not video_id:
+ continue
+ count = video.get('count') or {}
+ get_count = lambda x: int_or_none(count.get(x))
+ info = {
+ '_type': 'url',
+ 'id': video_id,
+ 'title': video.get('title'),
+ 'url': 'https://www.nicovideo.jp/watch/' + video_id,
+ 'description': video.get('shortDescription'),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': get_count('view'),
+ 'comment_count': get_count('comment'),
+ 'ie_key': NiconicoIE.ie_key(),
+ }
+ info.update(self._parse_owner(video))
+ yield info
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ mylist = self._call_api(list_id, 'list', {
+ 'pageSize': 1,
+ })
+ entries = InAdvancePagedList(
+ functools.partial(self._fetch_page, list_id),
+ math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
+ self._PAGE_SIZE)
+ result = self.playlist_result(
+ entries, list_id, mylist.get('name'), mylist.get('description'))
+ result.update(self._parse_owner(mylist))
+ return result
diff --git a/hypervideo_dl/extractor/ninecninemedia.py b/hypervideo_dl/extractor/ninecninemedia.py
new file mode 100644
index 0000000..cfc2203
--- /dev/null
+++ b/hypervideo_dl/extractor/ninecninemedia.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class NineCNineMediaIE(InfoExtractor):
+ IE_NAME = '9c9media'
+ _GEO_COUNTRIES = ['CA']
+ _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+ _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
+
+ def _real_extract(self, url):
+ destination_code, content_id = re.match(self._VALID_URL, url).groups()
+ api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
+ content = self._download_json(api_base_url, content_id, query={
+ '$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
+ })
+ title = content['Name']
+ content_package = content['ContentPackages'][0]
+ package_id = content_package['Id']
+ content_package_url = api_base_url + 'contentpackages/%s/' % package_id
+ content_package = self._download_json(
+ content_package_url, content_id, query={
+ '$include': '[HasClosedCaptions]',
+ })
+
+ if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ manifest_base_url = content_package_url + 'manifest.'
+ formats = []
+ formats.extend(self._extract_m3u8_formats(
+ manifest_base_url + 'm3u8', content_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ manifest_base_url + 'f4m', content_id,
+ f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ manifest_base_url + 'mpd', content_id,
+ mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for image in (content.get('Images') or []):
+ image_url = image.get('Url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('Width')),
+ 'height': int_or_none(image.get('Height')),
+ })
+
+ tags, categories = [], []
+ for source_name, container in (('Tags', tags), ('Genres', categories)):
+ for e in content.get(source_name, []):
+ e_name = e.get('Name')
+ if not e_name:
+ continue
+ container.append(e_name)
+
+ season = content.get('Season') or {}
+
+ info = {
+ 'id': content_id,
+ 'title': title,
+ 'description': content.get('Desc') or content.get('ShortDesc'),
+ 'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
+ 'episode_number': int_or_none(content.get('Episode')),
+ 'season': season.get('Name'),
+ 'season_number': int_or_none(season.get('Number')),
+ 'season_id': season.get('Id'),
+ 'series': try_get(content, lambda x: x['Media']['Name']),
+ 'tags': tags,
+ 'categories': categories,
+ 'duration': float_or_none(content_package.get('Duration')),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+ if content_package.get('HasClosedCaptions'):
+ info['subtitles'] = {
+ 'en': [{
+ 'url': manifest_base_url + 'vtt',
+ 'ext': 'vtt',
+ }, {
+ 'url': manifest_base_url + 'srt',
+ 'ext': 'srt',
+ }]
+ }
+
+ return info
diff --git a/hypervideo_dl/extractor/ninegag.py b/hypervideo_dl/extractor/ninegag.py
new file mode 100644
index 0000000..1439082
--- /dev/null
+++ b/hypervideo_dl/extractor/ninegag.py
@@ -0,0 +1,130 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class NineGagIE(InfoExtractor):
+ IE_NAME = '9gag'
+ _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
+
+ _TESTS = [{
+ 'url': 'https://9gag.com/gag/ae5Ag7B',
+ 'info_dict': {
+ 'id': 'ae5Ag7B',
+ 'ext': 'mp4',
+ 'title': 'Capybara Agility Training',
+ 'upload_date': '20191108',
+ 'timestamp': 1573237208,
+ 'categories': ['Awesome'],
+ 'tags': ['Weimaraner', 'American Pit Bull Terrier'],
+ 'duration': 44,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ # HTML escaped title
+ 'url': 'https://9gag.com/gag/av5nvyb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ post = self._download_json(
+ 'https://9gag.com/v1/post', post_id, query={
+ 'id': post_id
+ })['data']['post']
+
+ if post.get('type') != 'Animated':
+ raise ExtractorError(
+ 'The given url does not contain a video',
+ expected=True)
+
+ title = unescapeHTML(post['title'])
+
+ duration = None
+ formats = []
+ thumbnails = []
+ for key, image in (post.get('images') or {}).items():
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ ext = determine_ext(image_url)
+ image_id = key.strip('image')
+ common = {
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ }
+ if ext in ('jpg', 'png'):
+ webp_url = image.get('webpUrl')
+ if webp_url:
+ t = common.copy()
+ t.update({
+ 'id': image_id + '-webp',
+ 'url': webp_url,
+ })
+ thumbnails.append(t)
+ common.update({
+ 'id': image_id,
+ 'ext': ext,
+ })
+ thumbnails.append(common)
+ elif ext in ('webm', 'mp4'):
+ if not duration:
+ duration = int_or_none(image.get('duration'))
+ common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
+ for vcodec in ('vp8', 'vp9', 'h265'):
+ c_url = image.get(vcodec + 'Url')
+ if not c_url:
+ continue
+ c_f = common.copy()
+ c_f.update({
+ 'format_id': image_id + '-' + vcodec,
+ 'url': c_url,
+ 'vcodec': vcodec,
+ })
+ formats.append(c_f)
+ common.update({
+ 'ext': ext,
+ 'format_id': image_id,
+ })
+ formats.append(common)
+ self._sort_formats(formats)
+
+ section = try_get(post, lambda x: x['postSection']['name'])
+
+ tags = None
+ post_tags = post.get('tags')
+ if post_tags:
+ tags = []
+ for tag in post_tags:
+ tag_key = tag.get('key')
+ if not tag_key:
+ continue
+ tags.append(tag_key)
+
+ get_count = lambda x: int_or_none(post.get(x + 'Count'))
+
+ return {
+ 'id': post_id,
+ 'title': title,
+ 'timestamp': int_or_none(post.get('creationTs')),
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'like_count': get_count('upVote'),
+ 'dislike_count': get_count('downVote'),
+ 'comment_count': get_count('comments'),
+ 'age_limit': 18 if post.get('nsfw') == 1 else None,
+ 'categories': [section] if section else None,
+ 'tags': tags,
+ }
diff --git a/hypervideo_dl/extractor/ninenow.py b/hypervideo_dl/extractor/ninenow.py
new file mode 100644
index 0000000..6157dc7
--- /dev/null
+++ b/hypervideo_dl/extractor/ninenow.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ smuggle_url,
+)
+
+
+class NineNowIE(InfoExtractor):
+ IE_NAME = '9now.com.au'
+ _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
+ _TESTS = [{
+ # clip
+ 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
+ 'md5': '17cf47d63ec9323e562c9957a968b565',
+ 'info_dict': {
+ 'id': '16801',
+ 'ext': 'mp4',
+ 'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike',
+ 'description': 'Is a boycott of the NAB Cup "on the table"?',
+ 'uploader_id': '4460760524001',
+ 'upload_date': '20160713',
+ 'timestamp': 1468421266,
+ },
+ 'skip': 'Only available in Australia',
+ }, {
+ # episode
+ 'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19',
+ 'only_matching': True,
+ }, {
+ # DRM protected
+ 'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ page_data = self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*({.*?});', webpage,
+ 'page data', default='{}'), display_id, fatal=False)
+ if not page_data:
+ page_data = self._parse_json(self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
+ webpage, 'page data'), display_id), display_id)
+
+ for kind in ('episode', 'clip'):
+ current_key = page_data.get(kind, {}).get(
+ 'current%sKey' % kind.capitalize())
+ if not current_key:
+ continue
+ cache = page_data.get(kind, {}).get('%sCache' % kind, {})
+ if not cache:
+ continue
+ common_data = (cache.get(current_key) or list(cache.values())[0])[kind]
+ break
+ else:
+ raise ExtractorError('Unable to find video data')
+
+ video_data = common_data['video']
+
+ if video_data.get('drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
+ video_id = compat_str(video_data.get('id') or brightcove_id)
+ title = common_data['name']
+
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail_id[1:])
+ } for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()]
+
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': self._GEO_COUNTRIES}),
+ 'id': video_id,
+ 'title': title,
+ 'description': common_data.get('description'),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'thumbnails': thumbnails,
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/hypervideo_dl/extractor/nintendo.py b/hypervideo_dl/extractor/nintendo.py
new file mode 100644
index 0000000..ff8f70b
--- /dev/null
+++ b/hypervideo_dl/extractor/nintendo.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class NintendoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:games/detail|nintendo-direct)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.nintendo.com/games/detail/duck-hunt-wii-u/',
+ 'info_dict': {
+ 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
+ 'ext': 'flv',
+ 'title': 'Duck Hunt Wii U VC NES - Trailer',
+ 'duration': 60.326,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u',
+ 'info_dict': {
+ 'id': 'tokyo-mirage-sessions-fe-wii-u',
+ 'title': 'Tokyo Mirage Sessions ♯FE',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
+ 'info_dict': {
+ 'id': 'J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V',
+ 'ext': 'mp4',
+ 'title': 'Switch_ROS_ND0904-H264.mov',
+ 'duration': 2324.758,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, page_id)
+
+ entries = [
+ OoyalaIE._build_url_result(m.group('code'))
+ for m in re.finditer(
+ r'data-(?:video-id|directVideoId)=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage)]
+
+ title = self._html_search_regex(
+ r'(?s)<(?:span|div)[^>]+class="(?:title|wrapper)"[^>]*>.*?<h1>(.+?)</h1>',
+ webpage, 'title', fatal=False)
+
+ return self.playlist_result(
+ entries, page_id, title)
diff --git a/hypervideo_dl/extractor/njpwworld.py b/hypervideo_dl/extractor/njpwworld.py
new file mode 100644
index 0000000..3639d14
--- /dev/null
+++ b/hypervideo_dl/extractor/njpwworld.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_class,
+ urlencode_postdata,
+)
+
+
+class NJPWWorldIE(InfoExtractor):
+ _VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
+ IE_DESC = '新日本プロレスワールド'
+ _NETRC_MACHINE = 'njpwworld'
+
+ _TESTS = [{
+ 'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
+ 'info_dict': {
+ 'id': 's_series_00155_1_9',
+ 'ext': 'mp4',
+ 'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True, # AES-encrypted m3u8
+ },
+ 'skip': 'Requires login',
+ }, {
+ 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
+ 'info_dict': {
+ 'id': 's_series_00563_16_bs',
+ 'ext': 'mp4',
+ 'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
+ 'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ # No authentication to be performed
+ if not username:
+ return True
+
+ # Setup session (will set necessary cookies)
+ self._request_webpage(
+ 'https://njpwworld.com/', None, note='Setting up session')
+
+ webpage, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None,
+ note='Logging in', errnote='Unable to login',
+ data=urlencode_postdata({'login_id': username, 'pw': password}),
+ headers={'Referer': 'https://front.njpwworld.com/auth'})
+ # /auth/login will return 302 for successful logins
+ if urlh.geturl() == self._LOGIN_URL:
+ self.report_warning('unable to login')
+ return False
+
+ return True
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
+ player_path = '/intent?id=%s&type=url' % vid
+ player_url = compat_urlparse.urljoin(url, player_path)
+ formats.append({
+ 'url': player_url,
+ 'format_id': kind,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ 'quality': 2 if kind == 'high' else 1,
+ })
+
+ self._sort_formats(formats)
+
+ tag_block = get_element_by_class('tag-block', webpage)
+ tags = re.findall(
+ r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
+ ) if tag_block else None
+
+ return {
+ 'id': video_id,
+ 'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
+ 'formats': formats,
+ 'tags': tags,
+ }
diff --git a/hypervideo_dl/extractor/nobelprize.py b/hypervideo_dl/extractor/nobelprize.py
new file mode 100644
index 0000000..4dfdb09
--- /dev/null
+++ b/hypervideo_dl/extractor/nobelprize.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ mimetype2ext,
+ determine_ext,
+ update_url_query,
+ get_element_by_attribute,
+ int_or_none,
+)
+
+
+class NobelPrizeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
+ 'md5': '04c81e5714bb36cc4e2232fee1d8157f',
+ 'info_dict': {
+ 'id': '2636',
+ 'ext': 'mp4',
+ 'title': 'Announcement of the 2016 Nobel Prize in Physics',
+ 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media = self._parse_json(self._search_regex(
+ r'(?s)var\s*config\s*=\s*({.+?});', webpage,
+ 'config'), video_id, js_to_json)['media']
+ title = media['title']
+
+ formats = []
+ for source in media.get('source', []):
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(source_src, {'hdcore': '3.7.0'}),
+ video_id, f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'url': source_src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get_element_by_attribute('itemprop', 'description', webpage),
+ 'duration': int_or_none(media.get('duration')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nonktube.py b/hypervideo_dl/extractor/nonktube.py
new file mode 100644
index 0000000..ca1424e
--- /dev/null
+++ b/hypervideo_dl/extractor/nonktube.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+from .nuevo import NuevoBaseIE
+
+
+class NonkTubeIE(NuevoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nonktube\.com/(?:(?:video|embed)/|media/nuevo/embed\.php\?.*?\bid=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.nonktube.com/video/118636/sensual-wife-uncensored-fucked-in-hairy-pussy-and-facialized',
+ 'info_dict': {
+ 'id': '118636',
+ 'ext': 'mp4',
+ 'title': 'Sensual Wife Uncensored Fucked In Hairy Pussy And Facialized',
+ 'age_limit': 18,
+ 'duration': 1150.98,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.nonktube.com/embed/118636',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/noovo.py b/hypervideo_dl/extractor/noovo.py
new file mode 100644
index 0000000..b40770d
--- /dev/null
+++ b/hypervideo_dl/extractor/noovo.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ smuggle_url,
+ try_get,
+)
+
+
+class NoovoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
+ _TESTS = [{
+ # clip
+ 'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
+ 'info_dict': {
+ 'id': '5386045029001',
+ 'ext': 'mp4',
+ 'title': 'Chrysler Imperial',
+ 'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
+ 'timestamp': 1491399228,
+ 'upload_date': '20170405',
+ 'uploader_id': '618566855001',
+ 'series': 'RPM+',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # episode
+ 'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
+ 'info_dict': {
+ 'id': '5395865725001',
+ 'title': 'Épisode 13 : Les retrouvailles',
+ 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
+ 'ext': 'mp4',
+ 'timestamp': 1492019320,
+ 'upload_date': '20170412',
+ 'uploader_id': '618566855001',
+ 'series': "L'amour est dans le pré",
+ 'season_number': 5,
+ 'episode': 'Épisode 13',
+ 'episode_number': 13,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ brightcove_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ title = try_get(
+ data, lambda x: x['video']['nom'],
+ compat_str) or self._html_search_meta(
+ 'dcterms.Title', webpage, 'title', fatal=True)
+
+ description = self._html_search_meta(
+ ('dcterms.Description', 'description'), webpage, 'description')
+
+ series = try_get(
+ data, lambda x: x['emission']['nom']) or self._search_regex(
+ r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
+ webpage, 'series', default=None)
+
+ season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
+ season = try_get(season_el, lambda x: x['nom'], compat_str)
+ season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
+
+ episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
+ episode = try_get(episode_el, lambda x: x['nom'], compat_str)
+ episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['CA']}),
+ 'id': brightcove_id,
+ 'title': title,
+ 'description': description,
+ 'series': series,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ }
diff --git a/hypervideo_dl/extractor/normalboots.py b/hypervideo_dl/extractor/normalboots.py
new file mode 100644
index 0000000..61fe571
--- /dev/null
+++ b/hypervideo_dl/extractor/normalboots.py
@@ -0,0 +1,54 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+from ..utils import (
+ unified_strdate,
+)
+
+
+class NormalbootsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
+ _TEST = {
+ 'url': 'http://normalboots.com/video/home-alone-games-jontron/',
+ 'info_dict': {
+ 'id': 'home-alone-games-jontron',
+ 'ext': 'mp4',
+ 'title': 'Home Alone Games - JonTron - NormalBoots',
+ 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
+ 'uploader': 'JonTron',
+ 'upload_date': '20140125',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['JWPlatform'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_uploader = self._html_search_regex(
+ r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
+ webpage, 'uploader', fatal=False)
+ video_upload_date = unified_strdate(self._html_search_regex(
+ r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
+ webpage, 'date', fatal=False))
+
+ jwplatform_url = JWPlatformIE._extract_url(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': jwplatform_url,
+ 'ie_key': JWPlatformIE.ie_key(),
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': video_uploader,
+ 'upload_date': video_upload_date,
+ }
diff --git a/hypervideo_dl/extractor/nosvideo.py b/hypervideo_dl/extractor/nosvideo.py
new file mode 100644
index 0000000..53c500c
--- /dev/null
+++ b/hypervideo_dl/extractor/nosvideo.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ sanitized_Request,
+ urlencode_postdata,
+ xpath_text,
+ xpath_with_ns,
+)
+
+_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
+
+
+class NosVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
+ r'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
+ _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
+ _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
+ _TEST = {
+ 'url': 'http://nosvideo.com/?v=mu8fle7g7rpq',
+ 'md5': '6124ed47130d8be3eacae635b071e6b6',
+ 'info_dict': {
+ 'id': 'mu8fle7g7rpq',
+ 'ext': 'mp4',
+ 'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ fields = {
+ 'id': video_id,
+ 'op': 'download1',
+ 'method_free': 'Continue to Video',
+ }
+ req = sanitized_Request(url, urlencode_postdata(fields))
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ webpage = self._download_webpage(req, video_id,
+ 'Downloading download page')
+ if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+ raise ExtractorError('Video %s does not exist' % video_id,
+ expected=True)
+
+ xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
+ playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
+ playlist = self._download_xml(playlist_url, video_id)
+
+ track = playlist.find(_x('.//xspf:track'))
+ if track is None:
+ raise ExtractorError(
+ 'XML playlist is missing the \'track\' element',
+ expected=True)
+ title = xpath_text(track, _x('./xspf:title'), 'title')
+ url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
+ thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
+ if title is not None:
+ title = title.strip()
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nova.py b/hypervideo_dl/extractor/nova.py
new file mode 100644
index 0000000..47b9748
--- /dev/null
+++ b/hypervideo_dl/extractor/nova.py
@@ -0,0 +1,305 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ qualities,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class NovaEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
+ 'md5': 'ee009bafcc794541570edd44b71cbea3',
+ 'info_dict': {
+ 'id': '8o0n0r',
+ 'ext': 'mp4',
+ 'title': '2180. díl',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2578,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ duration = None
+ formats = []
+
+ player = self._parse_json(
+ self._search_regex(
+ r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
+ webpage, 'player', default='{}'), video_id, fatal=False)
+ if player:
+ for format_id, format_list in player['tracks'].items():
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_dict in format_list:
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = url_or_none(format_dict.get('src'))
+ format_type = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if (format_type == 'application/x-mpegURL'
+ or format_id == 'HLS' or ext == 'm3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ elif (format_type == 'application/dash+xml'
+ or format_id == 'DASH' or ext == 'mpd'):
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ duration = int_or_none(player.get('duration'))
+ else:
+ # Old path, not actual as of 08.04.2020
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
+ video_id, transform_source=js_to_json)
+
+ QUALITIES = ('lq', 'mq', 'hq', 'hd')
+ quality_key = qualities(QUALITIES)
+
+ for format_id, format_list in bitrates.items():
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_url in format_list:
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ continue
+ f = {
+ 'url': format_url,
+ }
+ f_id = format_id
+ for quality in QUALITIES:
+ if '%s.mp4' % quality in format_url:
+ f_id += '-%s' % quality
+ f.update({
+ 'quality': quality_key(quality),
+ 'format_note': quality.upper(),
+ })
+ break
+ f['format_id'] = f_id
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ (r'<value>(?P<title>[^<]+)',
+ r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._search_regex(
+ r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='value')
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
+ default=duration))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class NovaIE(InfoExtractor):
+ IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
+ _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
+ _TESTS = [{
+ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
+ 'md5': '249baab7d0104e186e78b0899c7d5f28',
+ 'info_dict': {
+ 'id': '1757139',
+ 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
+ 'ext': 'mp4',
+ 'title': 'Podzemní nemocnice v pražské Krči',
+ 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
+ }
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'info_dict': {
+ 'id': '1753621',
+ 'ext': 'mp4',
+ 'title': 'Zaklínač 3: Divoký hon',
+ 'description': 're:.*Pokud se stejně jako my nemůžete.*',
+ 'thumbnail': r're:https?://.*\.jpg(\?.*)?',
+ 'upload_date': '20150521',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'gone',
+ }, {
+ # media.cms.nova.cz embed
+ 'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
+ 'info_dict': {
+ 'id': '8o0n0r',
+ 'ext': 'mp4',
+ 'title': '2180. díl',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2578,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [NovaEmbedIE.ie_key()],
+ 'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
+ }, {
+ 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ site = mobj.group('site')
+
+ webpage = self._download_webpage(url, display_id)
+
+ description = clean_html(self._og_search_description(webpage, default=None))
+ if site == 'novaplus':
+ upload_date = unified_strdate(self._search_regex(
+ r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
+ elif site == 'fanda':
+ upload_date = unified_strdate(self._search_regex(
+ r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
+ else:
+ upload_date = None
+
+ # novaplus
+ embed_id = self._search_regex(
+ r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
+ webpage, 'embed url', default=None)
+ if embed_id:
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
+ 'ie_key': NovaEmbedIE.ie_key(),
+ 'id': embed_id,
+ 'description': description,
+ 'upload_date': upload_date
+ }
+
+ video_id = self._search_regex(
+ [r"(?:media|video_id)\s*:\s*'(\d+)'",
+ r'media=(\d+)',
+ r'id="article_video_(\d+)"',
+ r'id="player_(\d+)"'],
+ webpage, 'video id')
+
+ config_url = self._search_regex(
+ r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
+ webpage, 'config url', default=None)
+ config_params = {}
+
+ if not config_url:
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
+ 'player', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if player:
+ config_url = url_or_none(player.get('configUrl'))
+ params = player.get('configParams')
+ if isinstance(params, dict):
+ config_params = params
+
+ if not config_url:
+ DEFAULT_SITE_ID = '23000'
+ SITES = {
+ 'tvnoviny': DEFAULT_SITE_ID,
+ 'novaplus': DEFAULT_SITE_ID,
+ 'vymena': DEFAULT_SITE_ID,
+ 'krasna': DEFAULT_SITE_ID,
+ 'fanda': '30',
+ 'tn': '30',
+ 'doma': '30',
+ }
+
+ site_id = self._search_regex(
+ r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
+ site, DEFAULT_SITE_ID)
+
+ config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
+ config_params = {
+ 'site': site_id,
+ 'media': video_id,
+ 'quality': 3,
+ 'version': 1,
+ }
+
+ config = self._download_json(
+ config_url, display_id,
+ 'Downloading config JSON', query=config_params,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+ mediafile = config['mediafile']
+ video_url = mediafile['src']
+
+ m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
+ if m:
+ formats = [{
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
+ 'ext': 'flv',
+ }]
+ else:
+ formats = [{
+ 'url': video_url,
+ }]
+ self._sort_formats(formats)
+
+ title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
+ thumbnail = config.get('poster')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nowness.py b/hypervideo_dl/extractor/nowness.py
new file mode 100644
index 0000000..20ef4cd
--- /dev/null
+++ b/hypervideo_dl/extractor/nowness.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ sanitized_Request,
+)
+
+
+class NownessBaseIE(InfoExtractor):
+ def _extract_url_result(self, post):
+ if post['type'] == 'video':
+ for media in post['media']:
+ if media['type'] == 'video':
+ video_id = media['content']
+ source = media['source']
+ if source == 'brightcove':
+ player_code = self._download_webpage(
+ 'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
+ note='Downloading player JavaScript',
+ errnote='Unable to download player JavaScript')
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+ bc_url = BrightcoveNewIE._extract_url(self, player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+ raise ExtractorError('Could not find player definition')
+ elif source == 'vimeo':
+ return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+ elif source == 'youtube':
+ return self.url_result(video_id, 'Youtube')
+ elif source == 'cinematique':
+ # hypervideo currently doesn't support cinematique
+ # return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
+ pass
+
+ def _api_request(self, url, request_path):
+ display_id = self._match_id(url)
+ request = sanitized_Request(
+ 'http://api.nowness.com/api/' + request_path % display_id,
+ headers={
+ 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
+ })
+ return display_id, self._download_json(request, display_id)
+
+
+class NownessIE(NownessBaseIE):
+ IE_NAME = 'nowness'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
+ 'md5': '068bc0202558c2e391924cb8cc470676',
+ 'info_dict': {
+ 'id': '2520295746001',
+ 'ext': 'mp4',
+ 'title': 'Candor: The Art of Gesticulation',
+ 'description': 'Candor: The Art of Gesticulation',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1446745676,
+ 'upload_date': '20151105',
+ 'uploader_id': '2385340575001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
+ 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+ 'info_dict': {
+ 'id': '3716354522001',
+ 'ext': 'mp4',
+ 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1407315371,
+ 'upload_date': '20140806',
+ 'uploader_id': '2385340575001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ # vimeo
+ 'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
+ 'md5': '9a5a6a8edf806407e411296ab6bc2a49',
+ 'info_dict': {
+ 'id': '130020913',
+ 'ext': 'mp4',
+ 'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
+ 'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20150607',
+ 'uploader': 'Cinema Sem Lei',
+ 'uploader_id': 'cinemasemlei',
+ },
+ 'add_ie': ['Vimeo'],
+ }]
+
+ def _real_extract(self, url):
+ _, post = self._api_request(url, 'post/getBySlug/%s')
+ return self._extract_url_result(post)
+
+
+class NownessPlaylistIE(NownessBaseIE):
+ IE_NAME = 'nowness:playlist'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
+ 'info_dict': {
+ 'id': '3286',
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _real_extract(self, url):
+ playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
+ entries = [self._extract_url_result(item) for item in playlist['items']]
+ return self.playlist_result(entries, playlist_id)
+
+
+class NownessSeriesIE(NownessBaseIE):
+ IE_NAME = 'nowness:series'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
+ _TEST = {
+ 'url': 'https://www.nowness.com/series/60-seconds',
+ 'info_dict': {
+ 'id': '60',
+ 'title': '60 Seconds',
+ 'description': 'One-minute wisdom in a new NOWNESS series',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ def _real_extract(self, url):
+ display_id, series = self._api_request(url, 'series/getBySlug/%s')
+ entries = [self._extract_url_result(post) for post in series['posts']]
+ series_title = None
+ series_description = None
+ translations = series.get('translations', [])
+ if translations:
+ series_title = translations[0].get('title') or translations[0]['seoTitle']
+ series_description = translations[0].get('seoDescription')
+ return self.playlist_result(
+ entries, compat_str(series['id']), series_title, series_description)
diff --git a/hypervideo_dl/extractor/noz.py b/hypervideo_dl/extractor/noz.py
new file mode 100644
index 0000000..ccafd77
--- /dev/null
+++ b/hypervideo_dl/extractor/noz.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_xpath,
+)
+from ..utils import (
+ int_or_none,
+ find_xpath_attr,
+ xpath_text,
+ update_url_query,
+)
+
+
+class NozIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?noz\.de/video/(?P<id>[0-9]+)/'
+ _TESTS = [{
+ 'url': 'http://www.noz.de/video/25151/32-Deutschland-gewinnt-Badminton-Lnderspiel-in-Melle',
+ 'info_dict': {
+ 'id': '25151',
+ 'ext': 'mp4',
+ 'duration': 215,
+ 'title': '3:2 - Deutschland gewinnt Badminton-Länderspiel in Melle',
+ 'description': 'Vor rund 370 Zuschauern gewinnt die deutsche Badminton-Nationalmannschaft am Donnerstag ein EM-Vorbereitungsspiel gegen Frankreich in Melle. Video Moritz Frankenberg.',
+ 'thumbnail': r're:^http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
+
+ edge_url = self._html_search_regex(
+ r'<script\s+(?:type="text/javascript"\s+)?src="(.*?/videojs_.*?)"',
+ webpage, 'edge URL')
+ edge_content = self._download_webpage(edge_url, 'meta configuration')
+
+ config_url_encoded = self._search_regex(
+ r'so\.addVariable\("config_url","[^,]*,(.*?)"',
+ edge_content, 'config URL'
+ )
+ config_url = compat_urllib_parse_unquote(config_url_encoded)
+
+ doc = self._download_xml(config_url, 'video configuration')
+ title = xpath_text(doc, './/title')
+ thumbnail = xpath_text(doc, './/article/thumbnail/url')
+ duration = int_or_none(xpath_text(
+ doc, './/article/movie/file/duration'))
+ formats = []
+ for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
+ http_url_ele = find_xpath_attr(
+ qnode, './html_urls/video_url', 'format', 'video/mp4')
+ http_url = http_url_ele.text if http_url_ele is not None else None
+ if http_url:
+ formats.append({
+ 'url': http_url,
+ 'format_name': xpath_text(qnode, './name'),
+ 'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')),
+ 'height': int_or_none(xpath_text(qnode, './height')),
+ 'width': int_or_none(xpath_text(qnode, './width')),
+ 'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
+ })
+ else:
+ f4m_url = xpath_text(qnode, 'url_hd2')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(f4m_url, {'hdcore': '3.4.0'}),
+ video_id, f4m_id='hds', fatal=False))
+ m3u8_url_ele = find_xpath_attr(
+ qnode, './html_urls/video_url',
+ 'format', 'application/vnd.apple.mpegurl')
+ m3u8_url = m3u8_url_ele.text if m3u8_url_ele is not None else None
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/npo.py b/hypervideo_dl/extractor/npo.py
new file mode 100644
index 0000000..e525ad9
--- /dev/null
+++ b/hypervideo_dl/extractor/npo.py
@@ -0,0 +1,767 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ fix_xml_ampersands,
+ int_or_none,
+ merge_dicts,
+ orderedSet,
+ parse_duration,
+ qualities,
+ str_or_none,
+ strip_jsonp,
+ unified_strdate,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class NPOBaseIE(InfoExtractor):
+ def _get_token(self, video_id):
+ return self._download_json(
+ 'http://ida.omroep.nl/app.php/auth', video_id,
+ note='Downloading token')['token']
+
+
+class NPOIE(NPOBaseIE):
+ IE_NAME = 'npo'
+ IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
+ _VALID_URL = r'''(?x)
+ (?:
+ npo:|
+ https?://
+ (?:www\.)?
+ (?:
+ npo\.nl/(?:[^/]+/)*|
+ (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
+ omroepwnl\.nl/video/fragment/[^/]+__|
+ (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
+ )
+ )
+ (?P<id>[^/?#]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+ 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+ 'info_dict': {
+ 'id': 'VPWON_1220719',
+ 'ext': 'm4v',
+ 'title': 'Nieuwsuur',
+ 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
+ 'upload_date': '20140622',
+ },
+ }, {
+ 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
+ 'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
+ 'info_dict': {
+ 'id': 'VARA_101191800',
+ 'ext': 'm4v',
+ 'title': 'De Mega Mike & Mega Thomas show: The best of.',
+ 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
+ 'upload_date': '20090227',
+ 'duration': 2400,
+ },
+ }, {
+ 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
+ 'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+ 'info_dict': {
+ 'id': 'VPWON_1169289',
+ 'ext': 'm4v',
+ 'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
+ 'upload_date': '20130225',
+ 'duration': 3000,
+ },
+ }, {
+ 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
+ 'info_dict': {
+ 'id': 'WO_VPRO_043706',
+ 'ext': 'm4v',
+ 'title': 'De nieuwe mens - Deel 1',
+ 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
+ 'duration': 4680,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # non asf in streams
+ 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
+ 'info_dict': {
+ 'id': 'WO_NOS_762771',
+ 'ext': 'mp4',
+ 'title': 'Hoe gaat Europa verder na Parijs?',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+ 'info_dict': {
+ 'id': 'VPWON_1233944',
+ 'ext': 'm4v',
+ 'title': 'Aap, poot, pies',
+ 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+ 'upload_date': '20150508',
+ 'duration': 599,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+ 'info_dict': {
+ 'id': 'POW_00996502',
+ 'ext': 'm4v',
+ 'title': '''"Dit is wel een 'landslide'..."''',
+ 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+ 'upload_date': '20150508',
+ 'duration': 462,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # audio
+ 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
+ 'info_dict': {
+ 'id': 'RBX_FUNX_6683215',
+ 'ext': 'mp3',
+ 'title': 'Jouw Stad Rotterdam',
+ 'description': 'md5:db251505244f097717ec59fabc372d9f',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
+ 'only_matching': True,
+ }, {
+ # live stream
+ 'url': 'npo:LI_NL1_4188102',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://npo.nl/KN_1698996',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if any(ie.suitable(url)
+ for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
+ else super(NPOIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._get_info(url, video_id) or self._get_old_info(video_id)
+
+ def _get_info(self, url, video_id):
+ token = self._download_json(
+ 'https://www.npostart.nl/api/token', video_id,
+ 'Downloading token', headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['token']
+
+ player = self._download_json(
+ 'https://www.npostart.nl/player/%s' % video_id, video_id,
+ 'Downloading player JSON', data=urlencode_postdata({
+ 'autoplay': 0,
+ 'share': 1,
+ 'pageUrl': url,
+ 'hasAdConsent': 0,
+ '_token': token,
+ }))
+
+ player_token = player['token']
+
+ drm = False
+ format_urls = set()
+ formats = []
+ for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
+ streams = self._download_json(
+ 'https://start-player.npo.nl/video/%s/streams' % video_id,
+ video_id, 'Downloading %s profile JSON' % profile, fatal=False,
+ query={
+ 'profile': profile,
+ 'quality': 'npo',
+ 'tokenId': player_token,
+ 'streamType': 'broadcast',
+ })
+ if not streams:
+ continue
+ stream = streams.get('stream')
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('src'))
+ if not stream_url or stream_url in format_urls:
+ continue
+ format_urls.add(stream_url)
+ if stream.get('protection') is not None or stream.get('keySystemOptions') is not None:
+ drm = True
+ continue
+ stream_type = stream.get('type')
+ stream_ext = determine_ext(stream_url)
+ if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ stream_url, video_id, mpd_id='dash', fatal=False))
+ elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ elif re.search(r'\.isml?/Manifest', stream_url):
+ formats.extend(self._extract_ism_formats(
+ stream_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'url': stream_url,
+ })
+
+ if not formats:
+ if drm:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ return
+
+ self._sort_formats(formats)
+
+ info = {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+ embed_url = url_or_none(player.get('embedUrl'))
+ if embed_url:
+ webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed page', fatal=False)
+ if webpage:
+ video = self._parse_json(
+ self._search_regex(
+ r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
+ default='{}'), video_id)
+ if video:
+ title = video.get('episodeTitle')
+ subtitles = {}
+ subtitles_list = video.get('subtitles')
+ if isinstance(subtitles_list, list):
+ for cc in subtitles_list:
+ cc_url = url_or_none(cc.get('src'))
+ if not cc_url:
+ continue
+ lang = str_or_none(cc.get('language')) or 'nl'
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ return merge_dicts({
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': url_or_none(
+ video.get('still_image_url') or video.get('orig_image_url')),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('broadcastDate')),
+ 'creator': video.get('channel'),
+ 'series': video.get('title'),
+ 'episode': title,
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'subtitles': subtitles,
+ }, info)
+
+ return info
+
+ def _get_old_info(self, video_id):
+ metadata = self._download_json(
+ 'http://e.omroep.nl/metadata/%s' % video_id,
+ video_id,
+ # We have to remove the javascript callback
+ transform_source=strip_jsonp,
+ )
+
+ error = metadata.get('error')
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ # For some videos actual video id (prid) is different (e.g. for
+ # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
+ # video id is POMS_WNL_853698 but prid is POW_00996502)
+ video_id = metadata.get('prid') or video_id
+
+ # titel is too generic in some cases so utilize aflevering_titel as well
+ # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
+ title = metadata['titel']
+ sub_title = metadata.get('aflevering_titel')
+ if sub_title and sub_title != title:
+ title += ': %s' % sub_title
+
+ token = self._get_token(video_id)
+
+ formats = []
+ urls = set()
+
+ def is_legal_url(format_url):
+ return format_url and format_url not in urls and re.match(
+ r'^(?:https?:)?//', format_url)
+
+ QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
+ QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
+
+ quality_from_label = qualities(QUALITY_LABELS)
+ quality_from_format_id = qualities(QUALITY_FORMATS)
+ items = self._download_json(
+ 'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
+ 'Downloading formats JSON', query={
+ 'adaptive': 'yes',
+ 'token': token,
+ })['items'][0]
+ for num, item in enumerate(items):
+ item_url = item.get('url')
+ if not is_legal_url(item_url):
+ continue
+ urls.add(item_url)
+ format_id = self._search_regex(
+ r'video/ida/([^/]+)', item_url, 'format id',
+ default=None)
+
+ item_label = item.get('label')
+
+ def add_format_url(format_url):
+ width = int_or_none(self._search_regex(
+ r'(\d+)[xX]\d+', format_url, 'width', default=None))
+ height = int_or_none(self._search_regex(
+ r'\d+[xX](\d+)', format_url, 'height', default=None))
+ if item_label in QUALITY_LABELS:
+ quality = quality_from_label(item_label)
+ f_id = item_label
+ elif item_label in QUALITY_FORMATS:
+ quality = quality_from_format_id(format_id)
+ f_id = format_id
+ else:
+ quality, f_id = [None] * 2
+ formats.append({
+ 'url': format_url,
+ 'format_id': f_id,
+ 'width': width,
+ 'height': height,
+ 'quality': quality,
+ })
+
+ # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+ if item.get('contentType') in ('url', 'audio'):
+ add_format_url(item_url)
+ continue
+
+ try:
+ stream_info = self._download_json(
+ item_url + '&type=json', video_id,
+ 'Downloading %s stream JSON'
+ % item_label or item.get('format') or format_id or num)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ error = (self._parse_json(
+ ee.cause.read().decode(), video_id,
+ fatal=False) or {}).get('errorstring')
+ if error:
+ raise ExtractorError(error, expected=True)
+ raise
+ # Stream URL instead of JSON, example: npo:LI_NL1_4188102
+ if isinstance(stream_info, compat_str):
+ if not stream_info.startswith('http'):
+ continue
+ video_url = stream_info
+ # JSON
+ else:
+ video_url = stream_info.get('url')
+ if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
+ continue
+ urls.add(video_url)
+ if determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ add_format_url(video_url)
+
+ is_live = metadata.get('medium') == 'live'
+
+ if not is_live:
+ for num, stream in enumerate(metadata.get('streams', [])):
+ stream_url = stream.get('url')
+ if not is_legal_url(stream_url):
+ continue
+ urls.add(stream_url)
+ # smooth streaming is not supported
+ stream_type = stream.get('type', '').lower()
+ if stream_type in ['ss', 'ms']:
+ continue
+ if stream_type == 'hds':
+ f4m_formats = self._extract_f4m_formats(
+ stream_url, video_id, fatal=False)
+ # f4m downloader downloads only piece of live stream
+ for f4m_format in f4m_formats:
+ f4m_format['preference'] = -1
+ formats.extend(f4m_formats)
+ elif stream_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4', fatal=False))
+ # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+ elif '.asf' in stream_url:
+ asx = self._download_xml(
+ stream_url, video_id,
+ 'Downloading stream %d ASX playlist' % num,
+ transform_source=fix_xml_ampersands, fatal=False)
+ if not asx:
+ continue
+ ref = asx.find('./ENTRY/Ref')
+ if ref is None:
+ continue
+ video_url = ref.get('href')
+ if not video_url or video_url in urls:
+ continue
+ urls.add(video_url)
+ formats.append({
+ 'url': video_url,
+ 'ext': stream.get('formaat', 'asf'),
+ 'quality': stream.get('kwaliteit'),
+ 'preference': -10,
+ })
+ else:
+ formats.append({
+ 'url': stream_url,
+ 'quality': stream.get('kwaliteit'),
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if metadata.get('tt888') == 'ja':
+ subtitles['nl'] = [{
+ 'ext': 'vtt',
+ 'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': metadata.get('info'),
+ 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
+ 'upload_date': unified_strdate(metadata.get('gidsdatum')),
+ 'duration': parse_duration(metadata.get('tijdsduur')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+
+class NPOLiveIE(NPOBaseIE):
+ IE_NAME = 'npo.nl:live'
+ _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
+
+ _TESTS = [{
+ 'url': 'http://www.npo.nl/live/npo-1',
+ 'info_dict': {
+ 'id': 'LI_NL1_4188102',
+ 'display_id': 'npo-1',
+ 'ext': 'mp4',
+ 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.npo.nl/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/live/npo-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url) or 'npo-1'
+
+ webpage = self._download_webpage(url, display_id)
+
+ live_id = self._search_regex(
+ [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'npo:%s' % live_id,
+ 'ie_key': NPOIE.ie_key(),
+ 'id': live_id,
+ 'display_id': display_id,
+ }
+
+
+class NPORadioIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-1',
+ 'info_dict': {
+ 'id': 'radio-1',
+ 'ext': 'mp3',
+ 'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
+
+ @staticmethod
+ def _html_get_attribute_regex(attribute):
+ return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ self._html_get_attribute_regex('data-channel'), webpage, 'title')
+
+ stream = self._parse_json(
+ self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
+ video_id)
+
+ codec = stream.get('codec')
+
+ return {
+ 'id': video_id,
+ 'url': stream['url'],
+ 'title': self._live_title(title),
+ 'acodec': codec,
+ 'ext': codec,
+ 'is_live': True,
+ }
+
+
+class NPORadioFragmentIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio:fragment'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+ 'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+ 'info_dict': {
+ 'id': '174356',
+ 'ext': 'mp3',
+ 'title': 'Jubileumconcert Willeke Alberti',
+ },
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._html_search_regex(
+ r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+ webpage, 'title')
+
+ audio_url = self._search_regex(
+ r"data-streams='([^']+)'", webpage, 'audio url')
+
+ return {
+ 'id': audio_id,
+ 'url': audio_url,
+ 'title': title,
+ }
+
+
+class NPODataMidEmbedIE(InfoExtractor):
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'NPO',
+ 'url': 'npo:%s' % video_id,
+ 'display_id': display_id
+ }
+
+
+class SchoolTVIE(NPODataMidEmbedIE):
+ IE_NAME = 'schooltv'
+ _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
+ 'info_dict': {
+ 'id': 'WO_NTR_429477',
+ 'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
+ 'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
+ 'ext': 'mp4',
+ 'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
+ },
+ 'params': {
+ # Skip because of m3u8 download
+ 'skip_download': True
+ }
+ }
+
+
+class HetKlokhuisIE(NPODataMidEmbedIE):
+ IE_NAME = 'hetklokhuis'
+ _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+
+ _TEST = {
+ 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
+ 'info_dict': {
+ 'id': 'VPWON_1260528',
+ 'display_id': 'Zwaartekrachtsgolven',
+ 'ext': 'm4v',
+ 'title': 'Het Klokhuis: Zwaartekrachtsgolven',
+ 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48',
+ 'upload_date': '20170223',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }
+
+
+class NPOPlaylistBaseIE(NPOIE):
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+ for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
+ ]
+
+ playlist_title = self._html_search_regex(
+ self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
+ default=None) or self._og_search_title(webpage)
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class VPROIE(NPOPlaylistBaseIE):
+ IE_NAME = 'vpro'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
+ r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
+ _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
+
+ _TESTS = [
+ {
+ 'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
+ 'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+ 'info_dict': {
+ 'id': 'VPWON_1169289',
+ 'ext': 'm4v',
+ 'title': 'De toekomst komt uit Afrika',
+ 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
+ 'upload_date': '20130225',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+ 'info_dict': {
+ 'id': 'sergio-herman',
+ 'title': 'sergio herman: fucking perfect',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # playlist with youtube embed
+ 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
+ 'info_dict': {
+ 'id': 'education-education',
+ 'title': 'education education',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
+ 'info_dict': {
+ 'id': 'de-tegenprestatie',
+ 'title': 'De Tegenprestatie',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
+ 'info_dict': {
+ 'id': 'VARA_101375237',
+ 'ext': 'm4v',
+ 'title': 'MH17: Het verdriet van Nederland',
+ 'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
+ 'upload_date': '20150716',
+ },
+ 'params': {
+ # Skip because of m3u8 download
+ 'skip_download': True
+ },
+ }
+ ]
+
+
+class WNLIE(NPOPlaylistBaseIE):
+ IE_NAME = 'wnl'
+ _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+ _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
+ _PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
+
+ _TESTS = [{
+ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+ 'info_dict': {
+ 'id': 'vandaag-de-dag-6-mei',
+ 'title': 'Vandaag de Dag 6 mei',
+ },
+ 'playlist_count': 4,
+ }]
+
+
+class AndereTijdenIE(NPOPlaylistBaseIE):
+ IE_NAME = 'anderetijden'
+ _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+ _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
+ _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
+
+ _TESTS = [{
+ 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
+ 'info_dict': {
+ 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+ 'title': 'Duitse soldaten over de Slag bij Arnhem',
+ },
+ 'playlist_count': 3,
+ }]
diff --git a/hypervideo_dl/extractor/npr.py b/hypervideo_dl/extractor/npr.py
new file mode 100644
index 0000000..9d1122f
--- /dev/null
+++ b/hypervideo_dl/extractor/npr.py
@@ -0,0 +1,124 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+ url_or_none,
+)
+
+
+class NprIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more',
+ 'info_dict': {
+ 'id': '449974205',
+ 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
+ },
+ 'playlist_count': 7,
+ }, {
+ 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz',
+ 'info_dict': {
+ 'id': '446928052',
+ 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '446929930',
+ 'ext': 'mp3',
+ 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)',
+ 'duration': 402,
+ },
+ }],
+ }, {
+ # multimedia, not media title
+ 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
+ 'info_dict': {
+ 'id': '533198237',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '533201718',
+ 'ext': 'mp4',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ 'duration': 402,
+ },
+ }],
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ # multimedia, no formats, stream
+ 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ story = self._download_json(
+ 'http://api.npr.org/query', playlist_id, query={
+ 'id': playlist_id,
+ 'fields': 'audio,multimedia,title',
+ 'format': 'json',
+ 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
+ })['list']['story'][0]
+ playlist_title = story.get('title', {}).get('$text')
+
+ KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
+ quality = qualities(KNOWN_FORMATS)
+
+ entries = []
+ for media in story.get('audio', []) + story.get('multimedia', []):
+ media_id = media['id']
+
+ formats = []
+ for format_id, formats_entry in media.get('format', {}).items():
+ if not formats_entry:
+ continue
+ if isinstance(formats_entry, list):
+ formats_entry = formats_entry[0]
+ format_url = formats_entry.get('$text')
+ if not format_url:
+ continue
+ if format_id in KNOWN_FORMATS:
+ if format_id == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id == 'smil':
+ smil_formats = self._extract_smil_formats(
+ format_url, media_id, transform_source=lambda s: s.replace(
+ 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
+ self._check_formats(smil_formats, media_id)
+ formats.extend(smil_formats)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+ for stream_id, stream_entry in media.get('stream', {}).items():
+ if not isinstance(stream_entry, dict):
+ continue
+ if stream_id != 'hlsUrl':
+ continue
+ stream_url = url_or_none(stream_entry.get('$text'))
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, stream_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': media_id,
+ 'title': media.get('title', {}).get('$text') or playlist_title,
+ 'thumbnail': media.get('altImageUrl', {}).get('$text'),
+ 'duration': int_or_none(media.get('duration', {}).get('$text')),
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/hypervideo_dl/extractor/nrk.py b/hypervideo_dl/extractor/nrk.py
new file mode 100644
index 0000000..40dee21
--- /dev/null
+++ b/hypervideo_dl/extractor/nrk.py
@@ -0,0 +1,873 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ str_or_none,
+ try_get,
+ urljoin,
+ url_or_none,
+)
+
+
+class NRKBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['NO']
+ _CDN_REPL_REGEX = r'''(?x)://
+ (?:
+ nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
+ nrk-od-no\.telenorcdn\.net|
+ minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
+ )/'''
+
+ def _extract_nrk_formats(self, asset_url, video_id):
+ if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
+ return self._extract_akamai_formats(asset_url, video_id)
+ asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
+ formats = self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
+ if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
+ formats = self._extract_m3u8_formats(
+ re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
+ video_id, 'mp4', 'm3u8_native', fatal=False)
+ return formats
+
+ def _raise_error(self, data):
+ MESSAGES = {
+ 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
+ 'ProgramRightsHasExpired': 'Programmet har gått ut',
+ 'NoProgramRights': 'Ikke tilgjengelig',
+ 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
+ }
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
+ self.raise_geo_restricted(
+ msg=MESSAGES.get('ProgramIsGeoBlocked'),
+ countries=self._GEO_COUNTRIES)
+ message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
+ return self._download_json(
+ urljoin('http://psapi.nrk.no/', path),
+ video_id, note or 'Downloading %s JSON' % item,
+ fatal=fatal, query=query,
+ headers={'Accept-Encoding': 'gzip, deflate, br'})
+
+
+class NRKIE(NRKBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ nrk:|
+ https?://
+ (?:
+ (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
+ v8[-.]psapi\.nrk\.no/mediaelement/
+ )
+ )
+ (?P<id>[^?\#&]+)
+ '''
+
+ _TESTS = [{
+ # video
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'f46be075326e23ad0e524edfcb06aeb6',
+ 'info_dict': {
+ 'id': '150533',
+ 'ext': 'mp4',
+ 'title': 'Dompap og andre fugler i Piip-Show',
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 262,
+ }
+ }, {
+ # audio
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ # MD5 is unstable
+ 'info_dict': {
+ 'id': '154915',
+ 'ext': 'mp4',
+ 'title': 'Slik høres internett ut når du er blind',
+ 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
+ }
+ }, {
+ 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
+ 'only_matching': True,
+ }, {
+ # podcast
+ 'url': 'nrk:l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:podcast/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'nrk:150533',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:clip/150533',
+ 'only_matching': True,
+ }, {
+ # program
+ 'url': 'nrk:MDDP12000117',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:program/ENRK10100318',
+ 'only_matching': True,
+ }, {
+ # direkte
+ 'url': 'nrk:nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:channel/nrk1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).split('/')[-1]
+
+ path_templ = 'playback/%s/' + video_id
+
+ def call_playback_api(item, query=None):
+ return self._call_api(path_templ % item, video_id, item, query=query)
+ # known values for preferredCdn: akamai, iponly, minicdn and telenor
+ manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
+
+ video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id
+
+ if manifest.get('playability') == 'nonPlayable':
+ self._raise_error(manifest['nonPlayable'])
+
+ playable = manifest['playable']
+
+ formats = []
+ for asset in playable['assets']:
+ if not isinstance(asset, dict):
+ continue
+ if asset.get('encrypted'):
+ continue
+ format_url = url_or_none(asset.get('url'))
+ if not format_url:
+ continue
+ asset_format = (asset.get('format') or '').lower()
+ if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_nrk_formats(format_url, video_id))
+ elif asset_format == 'mp3':
+ formats.append({
+ 'url': format_url,
+ 'format_id': asset_format,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ data = call_playback_api('metadata')
+
+ preplay = data['preplay']
+ titles = preplay['titles']
+ title = titles['title']
+ alt_title = titles.get('subtitle')
+
+ description = preplay.get('description')
+ duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
+
+ thumbnails = []
+ for image in try_get(
+ preplay, lambda x: x['poster']['images'], list) or []:
+ if not isinstance(image, dict):
+ continue
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('pixelWidth')),
+ 'height': int_or_none(image.get('pixelHeight')),
+ })
+
+ subtitles = {}
+ for sub in try_get(playable, lambda x: x['subtitles'], list) or []:
+ if not isinstance(sub, dict):
+ continue
+ sub_url = url_or_none(sub.get('webVtt'))
+ if not sub_url:
+ continue
+ sub_key = str_or_none(sub.get('language')) or 'nb'
+ sub_type = str_or_none(sub.get('type'))
+ if sub_type:
+ sub_key += '-%s' % sub_type
+ subtitles.setdefault(sub_key, []).append({
+ 'url': sub_url,
+ })
+
+ legal_age = try_get(
+ data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
+ # https://en.wikipedia.org/wiki/Norwegian_Media_Authority
+ age_limit = None
+ if legal_age:
+ if legal_age == 'A':
+ age_limit = 0
+ elif legal_age.isdigit():
+ age_limit = int_or_none(legal_age)
+
+ is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': alt_title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ if is_series:
+ series = season_id = season_number = episode = episode_number = None
+ programs = self._call_api(
+ 'programs/%s' % video_id, video_id, 'programs', fatal=False)
+ if programs and isinstance(programs, dict):
+ series = str_or_none(programs.get('seriesTitle'))
+ season_id = str_or_none(programs.get('seasonId'))
+ season_number = int_or_none(programs.get('seasonNumber'))
+ episode = str_or_none(programs.get('episodeTitle'))
+ episode_number = int_or_none(programs.get('episodeNumber'))
+ if not series:
+ series = title
+ if alt_title:
+ title += ' - %s' % alt_title
+ if not season_number:
+ season_number = int_or_none(self._search_regex(
+ r'Sesong\s+(\d+)', description or '', 'season number',
+ default=None))
+ if not episode:
+ episode = alt_title if is_series else None
+ if not episode_number:
+ episode_number = int_or_none(self._search_regex(
+ r'^(\d+)\.', episode or '', 'episode number',
+ default=None))
+ if not episode_number:
+ episode_number = int_or_none(self._search_regex(
+ r'\((\d+)\s*:\s*\d+\)', description or '',
+ 'episode number', default=None))
+ info.update({
+ 'title': title,
+ 'series': series,
+ 'season_id': season_id,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ })
+
+ return info
+
+
+class NRKTVIE(InfoExtractor):
+ IE_DESC = 'NRK TV and NRK Radio'
+ _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/MDDP12000117',
+ 'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
+ 'info_dict': {
+ 'id': 'MDDP12000117',
+ 'ext': 'mp4',
+ 'title': 'Alarm Trolltunga',
+ 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
+ 'duration': 2223.44,
+ 'age_limit': 6,
+ 'subtitles': {
+ 'nb-nor': [{
+ 'ext': 'vtt',
+ }],
+ 'nb-ttv': [{
+ 'ext': 'vtt',
+ }]
+ },
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
+ 'md5': '8d40dab61cea8ab0114e090b029a0565',
+ 'info_dict': {
+ 'id': 'MUHH48000314',
+ 'ext': 'mp4',
+ 'title': '20 spørsmål - 23. mai 2014',
+ 'alt_title': '23. mai 2014',
+ 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+ 'duration': 1741,
+ 'series': '20 spørsmål',
+ 'episode': '23. mai 2014',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/program/mdfp15000514',
+ 'info_dict': {
+ 'id': 'MDFP15000514',
+ 'ext': 'mp4',
+ 'title': 'Kunnskapskanalen - Grunnlovsjubiléet - Stor ståhei for ingenting',
+ 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
+ 'duration': 4605.08,
+ 'series': 'Kunnskapskanalen',
+ 'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # single playlist video
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+ 'info_dict': {
+ 'id': 'MSPO40010515',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'skip': 'particular part is not supported currently',
+ }, {
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+ 'info_dict': {
+ 'id': 'MSPO40010515',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
+ 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'age_limit': 0,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ 'skip': 'Ikke tilgjengelig utenfor Norge',
+ }, {
+ 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
+ 'info_dict': {
+ 'id': 'KMTE50001317',
+ 'ext': 'mp4',
+ 'title': 'Anno - 13. episode',
+ 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
+ 'duration': 2340,
+ 'series': 'Anno',
+ 'episode': '13. episode',
+ 'season_number': 3,
+ 'episode_number': 13,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
+ 'info_dict': {
+ 'id': 'MUHH46000317',
+ 'ext': 'mp4',
+ 'title': 'Nytt på Nytt 27.01.2017',
+ 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
+ 'duration': 1796,
+ 'series': 'Nytt på nytt',
+ 'episode': '27.01.2017',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'ProgramRightsHasExpired',
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
+
+
+class NRKTVEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
+ 'info_dict': {
+ 'id': 'MUHH36005220',
+ 'ext': 'mp4',
+ 'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
+ 'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
+ 'duration': 1563.92,
+ 'series': 'Hellums kro',
+ 'season_number': 1,
+ 'episode_number': 2,
+ 'episode': '2. Kro, krig og kjærlighet',
+ 'age_limit': 6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
+ 'info_dict': {
+ 'id': 'MSUI14000816',
+ 'ext': 'mp4',
+ 'title': 'Backstage - 8. episode',
+ 'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
+ 'duration': 1320,
+ 'series': 'Backstage',
+ 'season_number': 1,
+ 'episode_number': 8,
+ 'episode': '8. episode',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'ProgramRightsHasExpired',
+ }]
+
+ def _real_extract(self, url):
+ display_id, season_number, episode_number = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, display_id)
+
+ info = self._search_json_ld(webpage, display_id, default={})
+ nrk_id = info.get('@id') or self._html_search_meta(
+ 'nrk:program-id', webpage, default=None) or self._search_regex(
+ r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
+ 'nrk id')
+ assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
+
+ info.update({
+ '_type': 'url',
+ 'id': nrk_id,
+ 'url': 'nrk:%s' % nrk_id,
+ 'ie_key': NRKIE.ie_key(),
+ 'season_number': int(season_number),
+ 'episode_number': int(episode_number),
+ })
+ return info
+
+
+class NRKTVSerieBaseIE(NRKBaseIE):
+ def _extract_entries(self, entry_list):
+ if not isinstance(entry_list, list):
+ return []
+ entries = []
+ for episode in entry_list:
+ nrk_id = episode.get('prfId') or episode.get('episodeId')
+ if not nrk_id or not isinstance(nrk_id, compat_str):
+ continue
+ entries.append(self.url_result(
+ 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
+ return entries
+
+ _ASSETS_KEYS = ('episodes', 'instalments',)
+
+ def _extract_assets_key(self, embedded):
+ for asset_key in self._ASSETS_KEYS:
+ if embedded.get(asset_key):
+ return asset_key
+
+ @staticmethod
+ def _catalog_name(serie_kind):
+ return 'podcast' if serie_kind in ('podcast', 'podkast') else 'series'
+
+ def _entries(self, data, display_id):
+ for page_num in itertools.count(1):
+ embedded = data.get('_embedded') or data
+ if not isinstance(embedded, dict):
+ break
+ assets_key = self._extract_assets_key(embedded)
+ if not assets_key:
+ break
+ # Extract entries
+ entries = try_get(
+ embedded,
+ (lambda x: x[assets_key]['_embedded'][assets_key],
+ lambda x: x[assets_key]),
+ list)
+ for e in self._extract_entries(entries):
+ yield e
+ # Find next URL
+ next_url_path = try_get(
+ data,
+ (lambda x: x['_links']['next']['href'],
+ lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
+ compat_str)
+ if not next_url_path:
+ break
+ data = self._call_api(
+ next_url_path, display_id,
+ note='Downloading %s JSON page %d' % (assets_key, page_num),
+ fatal=False)
+ if not data:
+ break
+
+
+class NRKTVSeasonIE(NRKTVSerieBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<domain>tv|radio)\.nrk\.no/
+ (?P<serie_kind>serie|pod[ck]ast)/
+ (?P<serie>[^/]+)/
+ (?:
+ (?:sesong/)?(?P<id>\d+)|
+ sesong/(?P<id_2>[^/?#&]+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
+ 'info_dict': {
+ 'id': 'backstage/1',
+ 'title': 'Sesong 1',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # no /sesong/ in path
+ 'url': 'https://tv.nrk.no/serie/lindmo/2016',
+ 'info_dict': {
+ 'id': 'lindmo/2016',
+ 'title': '2016',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ # weird nested _embedded in catalog JSON response
+ 'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
+ 'info_dict': {
+ 'id': 'dickie-dick-dickens/1',
+ 'title': 'Sesong 1',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ # 841 entries, multi page
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
+ 'info_dict': {
+ 'id': 'dagsnytt/201509',
+ 'title': 'September 2015',
+ },
+ 'playlist_mincount': 841,
+ }, {
+ # 180 entries, single page
+ 'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/diagnose-kverulant',
+ 'info_dict': {
+ 'id': 'hele_historien/diagnose-kverulant',
+ 'title': 'Diagnose kverulant',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/loerdagsraadet/sesong/202101',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url)
+ else super(NRKTVSeasonIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ domain = mobj.group('domain')
+ serie_kind = mobj.group('serie_kind')
+ serie = mobj.group('serie')
+ season_id = mobj.group('id') or mobj.group('id_2')
+ display_id = '%s/%s' % (serie, season_id)
+
+ data = self._call_api(
+ '%s/catalog/%s/%s/seasons/%s'
+ % (domain, self._catalog_name(serie_kind), serie, season_id),
+ display_id, 'season', query={'pageSize': 50})
+
+ title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
+ return self.playlist_result(
+ self._entries(data, display_id),
+ display_id, title)
+
+
+class NRKTVSeriesIE(NRKTVSerieBaseIE):
+ _VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/(?P<serie_kind>serie|pod[ck]ast)/(?P<id>[^/]+)'
+ _TESTS = [{
+ # new layout, instalments
+ 'url': 'https://tv.nrk.no/serie/groenn-glede',
+ 'info_dict': {
+ 'id': 'groenn-glede',
+ 'title': 'Grønn glede',
+ 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
+ },
+ 'playlist_mincount': 90,
+ }, {
+ # new layout, instalments, more entries
+ 'url': 'https://tv.nrk.no/serie/lindmo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/blank',
+ 'info_dict': {
+ 'id': 'blank',
+ 'title': 'Blank',
+ 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # new layout, seasons
+ 'url': 'https://tv.nrk.no/serie/backstage',
+ 'info_dict': {
+ 'id': 'backstage',
+ 'title': 'Backstage',
+ 'description': 'md5:63692ceb96813d9a207e9910483d948b',
+ },
+ 'playlist_mincount': 60,
+ }, {
+ # old layout
+ 'url': 'https://tv.nrksuper.no/serie/labyrint',
+ 'info_dict': {
+ 'id': 'labyrint',
+ 'title': 'Labyrint',
+ 'description': 'I Daidalos sin undersjøiske Labyrint venter spennende oppgaver, skumle robotskapninger og slim.',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/postmann-pat',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
+ 'info_dict': {
+ 'id': 'dickie-dick-dickens',
+ 'title': 'Dickie Dick Dickens',
+ 'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ 'url': 'https://nrksuper.no/serie/labyrint',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
+ 'info_dict': {
+ 'id': 'ulrikkes_univers',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/nrkno-poddkast-26588-134079-05042018030000',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (
+ False if any(ie.suitable(url)
+ for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE))
+ else super(NRKTVSeriesIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ site, serie_kind, series_id = re.match(self._VALID_URL, url).groups()
+ is_radio = site == 'radio.nrk'
+ domain = 'radio' if is_radio else 'tv'
+
+ size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
+ series = self._call_api(
+ '%s/catalog/%s/%s'
+ % (domain, self._catalog_name(serie_kind), series_id),
+ series_id, 'serie', query={size_prefix + 'ageSize': 50})
+ titles = try_get(series, [
+ lambda x: x['titles'],
+ lambda x: x[x['type']]['titles'],
+ lambda x: x[x['seriesType']]['titles'],
+ ]) or {}
+
+ entries = []
+ entries.extend(self._entries(series, series_id))
+ embedded = series.get('_embedded') or {}
+ linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
+ embedded_seasons = embedded.get('seasons') or []
+ if len(linked_seasons) > len(embedded_seasons):
+ for season in linked_seasons:
+ season_url = urljoin(url, season.get('href'))
+ if not season_url:
+ season_name = season.get('name')
+ if season_name and isinstance(season_name, compat_str):
+ season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name)
+ if season_url:
+ entries.append(self.url_result(
+ season_url, ie=NRKTVSeasonIE.ie_key(),
+ video_title=season.get('title')))
+ else:
+ for season in embedded_seasons:
+ entries.extend(self._entries(season, series_id))
+ entries.extend(self._entries(
+ embedded.get('extraMaterial') or {}, series_id))
+
+ return self.playlist_result(
+ entries, series_id, titles.get('title'), titles.get('subtitle'))
+
+
+class NRKTVDirekteIE(NRKTVIE):
+ IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/direkte/nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+ 'only_matching': True,
+ }]
+
+
+class NRKRadioPodkastIE(InfoExtractor):
+ _VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _TESTS = [{
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'md5': '8d40dab61cea8ab0114e090b029a0565',
+ 'info_dict': {
+ 'id': 'MUHH48000314AA',
+ 'ext': 'mp4',
+ 'title': '20 spørsmål 23.05.2014',
+ 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+ 'duration': 1741,
+ 'series': '20 spørsmål',
+ 'episode': '23.05.2014',
+ },
+ }, {
+ 'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/sesong/1/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/bortfoert-i-bergen/l_774d1a2c-7aa7-4965-8d1a-2c7aa7d9652c',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+ def _extract_description(self, webpage):
+ pass
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+ for video_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ playlist_title = self. _extract_title(webpage)
+ playlist_description = self._extract_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+ _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._og_search_title(webpage, fatal=False)
+
+ def _extract_description(self, webpage):
+ return self._og_search_description(webpage)
+
+
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+ _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+ 'info_dict': {
+ 'id': '69031',
+ 'title': 'Nytt på nytt, sesong: 201210',
+ },
+ 'playlist_count': 4,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+
+
+class NRKSkoleIE(InfoExtractor):
+ IE_DESC = 'NRK Skole'
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
+ 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
+ 'info_dict': {
+ 'id': '6021',
+ 'ext': 'mp4',
+ 'title': 'Genetikk og eneggede tvillinger',
+ 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
+ 'duration': 399,
+ },
+ }, {
+ 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ nrk_id = self._download_json(
+ 'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id,
+ video_id)['psId']
+
+ return self.url_result('nrk:%s' % nrk_id)
diff --git a/hypervideo_dl/extractor/nrl.py b/hypervideo_dl/extractor/nrl.py
new file mode 100644
index 0000000..22a2df8
--- /dev/null
+++ b/hypervideo_dl/extractor/nrl.py
@@ -0,0 +1,30 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class NRLTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
+ 'info_dict': {
+ 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
+ 'ext': 'mp4',
+ 'title': 'Match Highlights: Titans v Knights',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ q_data = self._parse_json(self._html_search_regex(
+ r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
+ ooyala_id = q_data['videoId']
+ return self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
diff --git a/hypervideo_dl/extractor/ntvcojp.py b/hypervideo_dl/extractor/ntvcojp.py
new file mode 100644
index 0000000..0c8221b
--- /dev/null
+++ b/hypervideo_dl/extractor/ntvcojp.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+)
+
+
+class NTVCoJpCUIE(InfoExtractor):
+ IE_NAME = 'cu.ntv.co.jp'
+ IE_DESC = 'Nippon Television Network'
+ _VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
+ 'info_dict': {
+ 'id': '5978891207001',
+ 'ext': 'mp4',
+ 'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
+ 'upload_date': '20181213',
+ 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+ 'uploader_id': '3855502814001',
+ 'timestamp': 1544669941,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_config = self._parse_json(self._search_regex(
+ r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
+ webpage, 'player config'), display_id, js_to_json)
+ video_id = player_config['videoId']
+ account_id = player_config.get('account') or '3855502814001'
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
+ 'description': self._html_search_meta(['description', 'og:description'], webpage),
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/hypervideo_dl/extractor/ntvde.py b/hypervideo_dl/extractor/ntvde.py
new file mode 100644
index 0000000..101a537
--- /dev/null
+++ b/hypervideo_dl/extractor/ntvde.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ parse_duration,
+)
+
+
+class NTVDeIE(InfoExtractor):
+ IE_NAME = 'n-tv.de'
+ _VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
+ 'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
+ 'info_dict': {
+ 'id': '14438086',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
+ 'alt_title': 'Winterchaos auf deutschen Straßen',
+ 'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
+ 'duration': 4020,
+ 'timestamp': 1422892797,
+ 'upload_date': '20150202',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ info = self._parse_json(self._search_regex(
+ r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),
+ video_id, transform_source=js_to_json)
+ timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
+ vdata = self._parse_json(self._search_regex(
+ r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
+ webpage, 'player data'), video_id,
+ transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s)))
+ duration = parse_duration(vdata.get('duration'))
+
+ formats = []
+ if vdata.get('video'):
+ formats.append({
+ 'format_id': 'flash',
+ 'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'],
+ })
+ if vdata.get('videoMp4'):
+ formats.append({
+ 'format_id': 'mobile',
+ 'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
+ 'tbr': 400, # estimation
+ })
+ if vdata.get('videoM3u8'):
+ m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ preference=0, m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['headline'],
+ 'description': info.get('intro'),
+ 'alt_title': info.get('kicker'),
+ 'timestamp': timestamp,
+ 'thumbnail': vdata.get('html5VideoPoster'),
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ntvru.py b/hypervideo_dl/extractor/ntvru.py
new file mode 100644
index 0000000..c47d1df
--- /dev/null
+++ b/hypervideo_dl/extractor/ntvru.py
@@ -0,0 +1,131 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ strip_or_none,
+ unescapeHTML,
+ xpath_text,
+)
+
+
+class NTVRuIE(InfoExtractor):
+ IE_NAME = 'ntv.ru'
+ _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.ntv.ru/novosti/863142/',
+ 'md5': 'ba7ea172a91cb83eb734cad18c10e723',
+ 'info_dict': {
+ 'id': '746000',
+ 'ext': 'mp4',
+ 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 136,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/video/novosti/750370/',
+ 'md5': 'adecff79691b4d71e25220a191477124',
+ 'info_dict': {
+ 'id': '750370',
+ 'ext': 'mp4',
+ 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 172,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+ 'md5': '82dbd49b38e3af1d00df16acbeab260c',
+ 'info_dict': {
+ 'id': '747480',
+ 'ext': 'mp4',
+ 'title': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'description': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 1496,
+ },
+ }, {
+ 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
+ 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4',
+ 'info_dict': {
+ 'id': '1126480',
+ 'ext': 'mp4',
+ 'title': 'Остросюжетный фильм «Кома»',
+ 'description': 'Остросюжетный фильм «Кома»',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 5592,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+ 'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
+ 'info_dict': {
+ 'id': '751482',
+ 'ext': 'mp4',
+ 'title': '«Дело врачей»: «Деревце жизни»',
+ 'description': '«Дело врачей»: «Деревце жизни»',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 2590,
+ },
+ }, {
+ # Schemeless file URL
+ 'url': 'https://www.ntv.ru/video/1797442',
+ 'only_matching': True,
+ }]
+
+ _VIDEO_ID_REGEXES = [
+ r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
+ r'<video embed=[^>]+><id>(\d+)</id>',
+ r'<video restriction[^>]+><key>(\d+)</key>',
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._og_search_property(
+ ('video', 'video:iframe'), webpage, default=None)
+ if video_url:
+ video_id = self._search_regex(
+ r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)',
+ video_url, 'video id', default=None)
+
+ if not video_id:
+ video_id = self._html_search_regex(
+ self._VIDEO_ID_REGEXES, webpage, 'video id')
+
+ player = self._download_xml(
+ 'http://www.ntv.ru/vi%s/' % video_id,
+ video_id, 'Downloading video XML')
+
+ title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
+
+ video = player.find('./data/video')
+
+ formats = []
+ for format_id in ['', 'hi', 'webm']:
+ file_ = xpath_text(video, './%sfile' % format_id)
+ if not file_:
+ continue
+ if file_.startswith('//'):
+ file_ = self._proto_relative_url(file_)
+ elif not file_.startswith('http'):
+ file_ = 'http://media.ntv.ru/vod/' + file_
+ formats.append({
+ 'url': file_,
+ 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': xpath_text(video, './id'),
+ 'title': title,
+ 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
+ 'thumbnail': xpath_text(video, './splash'),
+ 'duration': int_or_none(xpath_text(video, './totaltime')),
+ 'view_count': int_or_none(xpath_text(video, './views')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nuevo.py b/hypervideo_dl/extractor/nuevo.py
new file mode 100644
index 0000000..be1e09d
--- /dev/null
+++ b/hypervideo_dl/extractor/nuevo.py
@@ -0,0 +1,39 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ float_or_none,
+ xpath_text
+)
+
+
+class NuevoBaseIE(InfoExtractor):
+ def _extract_nuevo(self, config_url, video_id, headers={}):
+ config = self._download_xml(
+ config_url, video_id, transform_source=lambda s: s.strip(),
+ headers=headers)
+
+ title = xpath_text(config, './title', 'title', fatal=True).strip()
+ video_id = xpath_text(config, './mediaid', default=video_id)
+ thumbnail = xpath_text(config, ['./image', './thumb'])
+ duration = float_or_none(xpath_text(config, './duration'))
+
+ formats = []
+ for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
+ video_url = xpath_text(config, element_name)
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ self._check_formats(formats, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/nuvid.py b/hypervideo_dl/extractor/nuvid.py
new file mode 100644
index 0000000..ab6bfcd
--- /dev/null
+++ b/hypervideo_dl/extractor/nuvid.py
@@ -0,0 +1,71 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+)
+
+
+class NuvidIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://m.nuvid.com/video/1310741/',
+ 'md5': 'eab207b7ac4fccfb4e23c86201f11277',
+ 'info_dict': {
+ 'id': '1310741',
+ 'ext': 'mp4',
+ 'title': 'Horny babes show their awesome bodeis and',
+ 'duration': 129,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page_url = 'http://m.nuvid.com/video/%s' % video_id
+ webpage = self._download_webpage(
+ page_url, video_id, 'Downloading video page')
+ # When dwnld_speed exists and has a value larger than the MP4 file's
+ # bitrate, Nuvid returns the MP4 URL
+ # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
+ self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
+ mp4_webpage = self._download_webpage(
+ page_url, video_id, 'Downloading video page for MP4 format')
+
+ html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
+ video_url = self._html_search_regex(html5_video_re, webpage, video_id)
+ mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
+ formats = [{
+ 'url': video_url,
+ }]
+ if mp4_video_url != video_url:
+ formats.append({
+ 'url': mp4_video_url,
+ })
+
+ title = self._html_search_regex(
+ [r'<span title="([^"]+)">',
+ r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
+ r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
+ thumbnails = [
+ {
+ 'url': thumb_url,
+ } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
+ ]
+ thumbnail = thumbnails[0]['url'] if thumbnails else None
+ duration = parse_duration(self._html_search_regex(
+ [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
+ r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nytimes.py b/hypervideo_dl/extractor/nytimes.py
new file mode 100644
index 0000000..976b1c6
--- /dev/null
+++ b/hypervideo_dl/extractor/nytimes.py
@@ -0,0 +1,261 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hmac
+import hashlib
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_iso8601,
+ remove_start,
+)
+
+
+class NYTimesBaseIE(InfoExtractor):
+ _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
+
+ def _extract_video_from_id(self, video_id):
+ # Authorization generation algorithm is reverse engineered from `signer` in
+ # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
+ path = '/svc/video/api/v3/video/' + video_id
+ hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
+ video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
+ 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
+ 'X-NYTV': 'vhs',
+ }, fatal=False)
+ if not video_data:
+ video_data = self._download_json(
+ 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
+ video_id, 'Downloading video JSON')
+
+ title = video_data['headline']
+
+ def get_file_size(file_size):
+ if isinstance(file_size, int):
+ return file_size
+ elif isinstance(file_size, dict):
+ return int(file_size.get('value', 0))
+ else:
+ return None
+
+ urls = []
+ formats = []
+ for video in video_data.get('renditions', []):
+ video_url = video.get('url')
+ format_id = video.get('type')
+ if not video_url or format_id == 'thumbs' or video_url in urls:
+ continue
+ urls.append(video_url)
+ ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id or 'hls', fatal=False))
+ elif ext == 'mpd':
+ continue
+ # formats.extend(self._extract_mpd_formats(
+ # video_url, video_id, format_id or 'dash', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'vcodec': video.get('videoencoding') or video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
+ 'tbr': int_or_none(video.get('bitrate'), 1000) or None,
+ 'ext': ext,
+ })
+ self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': 'http://www.nytimes.com/' + image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ publication_date = video_data.get('publication_date')
+ timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('summary'),
+ 'timestamp': timestamp,
+ 'uploader': video_data.get('byline'),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class NYTimesIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
+ 'md5': 'd665342765db043f7e225cff19df0f2d',
+ 'info_dict': {
+ 'id': '100000002847155',
+ 'ext': 'mov',
+ 'title': 'Verbatim: What Is a Photocopier?',
+ 'description': 'md5:93603dada88ddbda9395632fdc5da260',
+ 'timestamp': 1398631707,
+ 'upload_date': '20140427',
+ 'uploader': 'Brett Weiner',
+ 'duration': 419,
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self._extract_video_from_id(video_id)
+
+
+class NYTimesArticleIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
+ 'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
+ 'info_dict': {
+ 'id': '100000003628438',
+ 'ext': 'mov',
+ 'title': 'New Minimum Wage: $70,000 a Year',
+ 'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
+ 'timestamp': 1429033037,
+ 'upload_date': '20150414',
+ 'uploader': 'Matthew Williams',
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
+ 'md5': 'e0d52040cafb07662acf3c9132db3575',
+ 'info_dict': {
+ 'id': '100000004709062',
+ 'title': 'The Run-Up: ‘He Was Like an Octopus’',
+ 'ext': 'mp3',
+ 'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
+ 'series': 'The Run-Up',
+ 'episode': '‘He Was Like an Octopus’',
+ 'episode_number': 20,
+ 'duration': 2130,
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
+ 'info_dict': {
+ 'id': '100000004709479',
+ 'title': 'The Rise of Hitler',
+ 'ext': 'mp3',
+ 'description': 'md5:bce877fd9e3444990cb141875fab0028',
+ 'creator': 'Pamela Paul',
+ 'duration': 3475,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
+ 'only_matching': True,
+ }]
+
+ def _extract_podcast_from_json(self, json, page_id, webpage):
+ podcast_audio = self._parse_json(
+ json, page_id, transform_source=js_to_json)
+
+ audio_data = podcast_audio['data']
+ track = audio_data['track']
+
+ episode_title = track['title']
+ video_url = track['source']
+
+ description = track.get('description') or self._html_search_meta(
+ ['og:description', 'twitter:description'], webpage)
+
+ podcast_title = audio_data.get('podcast', {}).get('title')
+ title = ('%s: %s' % (podcast_title, episode_title)
+ if podcast_title else episode_title)
+
+ episode = audio_data.get('podcast', {}).get('episode') or ''
+ episode_number = int_or_none(self._search_regex(
+ r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
+
+ return {
+ 'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'creator': track.get('credit'),
+ 'series': podcast_title,
+ 'episode': episode_title,
+ 'episode_number': episode_number,
+ 'duration': int_or_none(track.get('duration')),
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, page_id)
+
+ video_id = self._search_regex(
+ r'data-videoid=["\'](\d+)', webpage, 'video id',
+ default=None, fatal=False)
+ if video_id is not None:
+ return self._extract_video_from_id(video_id)
+
+ podcast_data = self._search_regex(
+ (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
+ r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
+ webpage, 'podcast data')
+ return self._extract_podcast_from_json(podcast_data, page_id, webpage)
+
+
+class NYTimesCookingIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
+ 'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
+ 'info_dict': {
+ 'id': '100000004756089',
+ 'ext': 'mov',
+ 'timestamp': 1479383008,
+ 'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
+ 'title': 'Cranberry Tart',
+ 'upload_date': '20161117',
+ 'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
+ },
+ }, {
+ 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
+ 'md5': '4b2e8c70530a89b8d905a2b572316eb8',
+ 'info_dict': {
+ 'id': '100000003951728',
+ 'ext': 'mov',
+ 'timestamp': 1445509539,
+ 'description': 'Turkey guide',
+ 'upload_date': '20151022',
+ 'title': 'Turkey',
+ }
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, page_id)
+
+ video_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'video id')
+
+ return self._extract_video_from_id(video_id)
diff --git a/hypervideo_dl/extractor/nzz.py b/hypervideo_dl/extractor/nzz.py
new file mode 100644
index 0000000..61ee77a
--- /dev/null
+++ b/hypervideo_dl/extractor/nzz.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+)
+
+
+class NZZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
+ 'info_dict': {
+ 'id': '9153',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
+ 'info_dict': {
+ 'id': '1368112',
+ },
+ 'playlist_count': 1,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ entries = []
+ for player_element in re.findall(
+ r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
+ player_params = extract_attributes(player_element)
+ if player_params.get('data-type') not in ('kaltura_singleArticle',):
+ self.report_warning('Unsupported player type')
+ continue
+ entry_id = player_params['data-id']
+ entries.append(self.url_result(
+ 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
+
+ return self.playlist_result(entries, page_id)
diff --git a/hypervideo_dl/extractor/odatv.py b/hypervideo_dl/extractor/odatv.py
new file mode 100644
index 0000000..314527f
--- /dev/null
+++ b/hypervideo_dl/extractor/odatv.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ NO_DEFAULT,
+ remove_start
+)
+
+
+class OdaTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'http://odatv.com/vid_video.php?id=8E388',
+ 'md5': 'dc61d052f205c9bf2da3545691485154',
+ 'info_dict': {
+ 'id': '8E388',
+ 'ext': 'mp4',
+ 'title': 'Artık Davutoğlu ile devam edemeyiz'
+ }
+ }, {
+ # mobile URL
+ 'url': 'http://odatv.com/mob_video.php?id=8E388',
+ 'only_matching': True,
+ }, {
+ # no video
+ 'url': 'http://odatv.com/mob_video.php?id=8E900',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ no_video = 'NO VIDEO!' in webpage
+
+ video_url = self._search_regex(
+ r'mp4\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'video url',
+ default=None if no_video else NO_DEFAULT, group='url')
+
+ if no_video:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': remove_start(self._og_search_title(webpage), 'Video: '),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/odnoklassniki.py b/hypervideo_dl/extractor/odnoklassniki.py
new file mode 100644
index 0000000..7ed9fac
--- /dev/null
+++ b/hypervideo_dl/extractor/odnoklassniki.py
@@ -0,0 +1,268 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+ urlencode_postdata,
+)
+
+
+class OdnoklassnikiIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m|mobile)\.)?
+ (?:odnoklassniki|ok)\.ru/
+ (?:
+ video(?:embed)?/|
+ web-api/video/moviePlayer/|
+ live/|
+ dk\?.*?st\.mvId=
+ )
+ (?P<id>[\d-]+)
+ '''
+ _TESTS = [{
+ # metadata in JSON
+ 'url': 'http://ok.ru/video/20079905452',
+ 'md5': '0b62089b479e06681abaaca9d204f152',
+ 'info_dict': {
+ 'id': '20079905452',
+ 'ext': 'mp4',
+ 'title': 'Культура меняет нас (прекрасный ролик!))',
+ 'duration': 100,
+ 'upload_date': '20141207',
+ 'uploader_id': '330537914540',
+ 'uploader': 'Виталий Добровольский',
+ 'like_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # metadataUrl
+ 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
+ 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
+ 'info_dict': {
+ 'id': '63567059965189-0',
+ 'ext': 'mp4',
+ 'title': 'Девушка без комплексов ...',
+ 'duration': 191,
+ 'upload_date': '20150518',
+ 'uploader_id': '534380003155',
+ 'uploader': '☭ Андрей Мещанинов ☭',
+ 'like_count': int,
+ 'age_limit': 0,
+ 'start_time': 5,
+ },
+ }, {
+ # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
+ 'url': 'http://ok.ru/video/64211978996595-1',
+ 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
+ 'info_dict': {
+ 'id': 'V_VztHT5BzY',
+ 'ext': 'mp4',
+ 'title': 'Космическая среда от 26 августа 2015',
+ 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
+ 'duration': 440,
+ 'upload_date': '20150826',
+ 'uploader_id': 'tvroscosmos',
+ 'uploader': 'Телестудия Роскосмоса',
+ 'age_limit': 0,
+ },
+ }, {
+ # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
+ 'url': 'http://ok.ru/video/62036049272859-0',
+ 'info_dict': {
+ 'id': '62036049272859-0',
+ 'ext': 'mp4',
+ 'title': 'МУЗЫКА ДОЖДЯ .',
+ 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
+ 'upload_date': '20120106',
+ 'uploader_id': '473534735899',
+ 'uploader': 'МARINA D',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video has not been found',
+ }, {
+ 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ok.ru/video/20648036891',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ok.ru/videoembed/20648036891',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.ok.ru/video/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mobile.ok.ru/video/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ok.ru/live/484531969818',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
+ 'only_matching': True,
+ }, {
+ # Paid video
+ 'url': 'https://ok.ru/video/954886983203',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ start_time = int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
+
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://ok.ru/video/%s' % video_id, video_id)
+
+ error = self._search_regex(
+ r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ player = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
+ webpage, 'player', group='player')),
+ video_id)
+
+ flashvars = player['flashvars']
+
+ metadata = flashvars.get('metadata')
+ if metadata:
+ metadata = self._parse_json(metadata, video_id)
+ else:
+ data = {}
+ st_location = flashvars.get('location')
+ if st_location:
+ data['st.location'] = st_location
+ metadata = self._download_json(
+ compat_urllib_parse_unquote(flashvars['metadataUrl']),
+ video_id, 'Downloading metadata JSON',
+ data=urlencode_postdata(data))
+
+ movie = metadata['movie']
+
+ # Some embedded videos may not contain title in movie dict (e.g.
+ # http://ok.ru/video/62036049272859-0) thus we allow missing title
+ # here and it's going to be extracted later by an extractor that
+ # will process the actual embed.
+ provider = metadata.get('provider')
+ title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
+
+ thumbnail = movie.get('poster')
+ duration = int_or_none(movie.get('duration'))
+
+ author = metadata.get('author', {})
+ uploader_id = author.get('id')
+ uploader = author.get('name')
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
+
+ age_limit = None
+ adult = self._html_search_meta(
+ 'ya:ovs:adult', webpage, 'age limit', default=None)
+ if adult:
+ age_limit = 18 if adult == 'true' else 0
+
+ like_count = int_or_none(metadata.get('likeCount'))
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'start_time': start_time,
+ }
+
+ if provider == 'USER_YOUTUBE':
+ info.update({
+ '_type': 'url_transparent',
+ 'url': movie['contentId'],
+ })
+ return info
+
+ assert title
+ if provider == 'LIVE_TV_APP':
+ info['title'] = self._live_title(title)
+
+ quality = qualities(('4', '0', '1', '2', '3', '5'))
+
+ formats = [{
+ 'url': f['url'],
+ 'ext': 'mp4',
+ 'format_id': f['name'],
+ } for f in metadata['videos']]
+
+ m3u8_url = metadata.get('hlsManifestUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ dash_manifest = metadata.get('metadataEmbedded')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(dash_manifest), 'mpd'))
+
+ for fmt in formats:
+ fmt_type = self._search_regex(
+ r'\btype[/=](\d)', fmt['url'],
+ 'format type', default=None)
+ if fmt_type:
+ fmt['quality'] = quality(fmt_type)
+
+ # Live formats
+ m3u8_url = metadata.get('hlsMasterPlaylistUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
+ m3u8_id='hls', fatal=False))
+ rtmp_url = metadata.get('rtmpUrl')
+ if rtmp_url:
+ formats.append({
+ 'url': rtmp_url,
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ })
+
+ if not formats:
+ payment_info = metadata.get('paymentInfo')
+ if payment_info:
+ raise ExtractorError('This video is paid, subscribe to download it', expected=True)
+
+ self._sort_formats(formats)
+
+ info['formats'] = formats
+ return info
diff --git a/hypervideo_dl/extractor/oktoberfesttv.py b/hypervideo_dl/extractor/oktoberfesttv.py
new file mode 100644
index 0000000..a914068
--- /dev/null
+++ b/hypervideo_dl/extractor/oktoberfesttv.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class OktoberfestTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
+
+ _TEST = {
+ 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
+ 'info_dict': {
+ 'id': 'hb-zelt',
+ 'ext': 'mp4',
+ 'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._live_title(self._html_search_regex(
+ r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title'))
+
+ clip = self._search_regex(
+ r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip')
+ ncurl = self._search_regex(
+ r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base')
+ video_url = ncurl + clip
+ thumbnail = self._search_regex(
+ r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage,
+ 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/once.py b/hypervideo_dl/extractor/once.py
new file mode 100644
index 0000000..3e44b78
--- /dev/null
+++ b/hypervideo_dl/extractor/once.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class OnceIE(InfoExtractor):
+ _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
+ ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
+ PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
+
+ def _extract_once_formats(self, url, http_formats_preference=None):
+ domain_id, application_id, media_item_id = re.match(
+ OnceIE._VALID_URL, url).groups()
+ formats = self._extract_m3u8_formats(
+ self.ADAPTIVE_URL_TEMPLATE % (
+ domain_id, application_id, media_item_id),
+ media_item_id, 'mp4', m3u8_id='hls', fatal=False)
+ progressive_formats = []
+ for adaptive_format in formats:
+ # Prevent advertisement from embedding into m3u8 playlist (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
+ adaptive_format['url'] = re.sub(
+ r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
+ rendition_id = self._search_regex(
+ r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
+ adaptive_format['url'], 'redition id', default=None)
+ if rendition_id:
+ progressive_format = adaptive_format.copy()
+ progressive_format.update({
+ 'url': self.PROGRESSIVE_URL_TEMPLATE % (
+ domain_id, application_id, rendition_id, media_item_id),
+ 'format_id': adaptive_format['format_id'].replace(
+ 'hls', 'http'),
+ 'protocol': 'http',
+ 'preference': http_formats_preference,
+ })
+ progressive_formats.append(progressive_format)
+ self._check_formats(progressive_formats, media_item_id)
+ formats.extend(progressive_formats)
+ return formats
diff --git a/hypervideo_dl/extractor/ondemandkorea.py b/hypervideo_dl/extractor/ondemandkorea.py
new file mode 100644
index 0000000..df1ce3c
--- /dev/null
+++ b/hypervideo_dl/extractor/ondemandkorea.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+)
+
+
+class OnDemandKoreaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+ _GEO_COUNTRIES = ['US', 'CA']
+ _TEST = {
+ 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
+ 'info_dict': {
+ 'id': 'ask-us-anything-e43',
+ 'ext': 'mp4',
+ 'title': 'Ask Us Anything : E43',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ if not webpage:
+ # Page sometimes returns captcha page with HTTP 403
+ raise ExtractorError(
+ 'Unable to access page. You may have been blocked.',
+ expected=True)
+
+ if 'msg_block_01.png' in webpage:
+ self.raise_geo_restricted(
+ msg='This content is not available in your region',
+ countries=self._GEO_COUNTRIES)
+
+ if 'This video is only available to ODK PLUS members.' in webpage:
+ raise ExtractorError(
+ 'This video is only available to ODK PLUS members.',
+ expected=True)
+
+ title = self._og_search_title(webpage)
+
+ jw_config = self._parse_json(
+ self._search_regex(
+ r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
+ webpage, 'jw config', group='options'),
+ video_id, transform_source=js_to_json)
+ info = self._parse_jwplayer_data(
+ jw_config, video_id, require_title=False, m3u8_id='hls',
+ base_url=url)
+
+ info.update({
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/onet.py b/hypervideo_dl/extractor/onet.py
new file mode 100644
index 0000000..e55b2ac
--- /dev/null
+++ b/hypervideo_dl/extractor/onet.py
@@ -0,0 +1,268 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ NO_DEFAULT,
+ parse_iso8601,
+ remove_start,
+ strip_or_none,
+ url_basename,
+)
+
+
+class OnetBaseIE(InfoExtractor):
+ _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/'
+
+ def _search_mvp_id(self, webpage):
+ return self._search_regex(
+ r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
+
+ def _extract_from_id(self, video_id, webpage=None):
+ response = self._download_json(
+ 'http://qi.ckm.onetapi.pl/', video_id,
+ query={
+ 'body[id]': video_id,
+ 'body[jsonrpc]': '2.0',
+ 'body[method]': 'get_asset_detail',
+ 'body[params][ID_Publikacji]': video_id,
+ 'body[params][Service]': 'www.onet.pl',
+ 'content-type': 'application/jsonp',
+ 'x-onet-app': 'player.front.onetapi.pl',
+ })
+
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
+
+ video = response['result'].get('0')
+
+ formats = []
+ for format_type, formats_dict in video['formats'].items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for format_id, format_list in formats_dict.items():
+ if not isinstance(format_list, list):
+ continue
+ for f in format_list:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if format_id.startswith('ism'):
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ elif format_id.startswith('hls'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ http_f = {
+ 'url': video_url,
+ 'format_id': format_id,
+ 'abr': float_or_none(f.get('audio_bitrate')),
+ }
+ if format_type == 'audio':
+ http_f['vcodec'] = 'none'
+ else:
+ http_f.update({
+ 'height': int_or_none(f.get('vertical_resolution')),
+ 'width': int_or_none(f.get('horizontal_resolution')),
+ 'vbr': float_or_none(f.get('video_bitrate')),
+ })
+ formats.append(http_f)
+ self._sort_formats(formats)
+
+ meta = video.get('meta', {})
+
+ title = (self._og_search_title(
+ webpage, default=None) if webpage else None) or meta['title']
+ description = (self._og_search_description(
+ webpage, default=None) if webpage else None) or meta.get('description')
+ duration = meta.get('length') or meta.get('lenght')
+ timestamp = parse_iso8601(meta.get('addDate'), ' ')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
+
+
+class OnetMVPIE(OnetBaseIE):
+ _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
+
+ _TEST = {
+ 'url': 'onetmvp:381027.1509591944',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_from_id(self._match_id(url))
+
+
+class OnetIE(OnetBaseIE):
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.tv'
+
+ _TESTS = [{
+ 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'md5': '436102770fb095c75b8bb0392d3da9ff',
+ 'info_dict': {
+ 'id': 'qbpyqc',
+ 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
+ 'ext': 'mp4',
+ 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd',
+ 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...',
+ 'upload_date': '20160705',
+ 'timestamp': 1467721580,
+ },
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, video_id = mobj.group('display_id', 'id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ mvp_id = self._search_mvp_id(webpage)
+
+ info_dict = self._extract_from_id(mvp_id, webpage)
+ info_dict.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+
+ return info_dict
+
+
+class OnetChannelIE(OnetBaseIE):
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)'
+ IE_NAME = 'onet.tv:channel'
+
+ _TESTS = [{
+ 'url': 'http://onet.tv/k/openerfestival',
+ 'info_dict': {
+ 'id': 'openerfestival',
+ 'title': "Open'er Festival",
+ 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.",
+ },
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, channel_id)
+
+ current_clip_info = self._parse_json(self._search_regex(
+ r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id,
+ transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s)))
+ video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
+ video_name = url_basename(current_clip_info['url'])
+
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen(
+ 'Downloading just video %s because of --no-playlist' % video_name)
+ return self._extract_from_id(video_id, webpage)
+
+ self.to_screen(
+ 'Downloading channel %s - add --no-playlist to just download video %s' % (
+ channel_id, video_name))
+ matches = re.findall(
+ r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
+ webpage)
+ entries = [
+ self.url_result(video_link, OnetIE.ie_key())
+ for video_link in matches]
+
+ channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
+ channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
+ return self.playlist_result(entries, channel_id, channel_title, channel_description)
+
+
+class OnetPlIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.pl'
+
+ _TESTS = [{
+ 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
+ 'md5': 'b94021eb56214c3969380388b6e73cb0',
+ 'info_dict': {
+ 'id': '1561707.1685479',
+ 'ext': 'mp4',
+ 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
+ 'description': 'md5:61fb0740084d2d702ea96512a03585b4',
+ 'upload_date': '20170214',
+ 'timestamp': 1487078046,
+ },
+ }, {
+ # embedded via pulsembed
+ 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
+ 'info_dict': {
+ 'id': '501235.965429946',
+ 'ext': 'mp4',
+ 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
+ 'upload_date': '20170622',
+ 'timestamp': 1498159955,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
+ 'only_matching': True,
+ }]
+
+ def _search_mvp_id(self, webpage, default=NO_DEFAULT):
+ return self._search_regex(
+ r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
+ default=default)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ mvp_id = self._search_mvp_id(webpage, default=None)
+
+ if not mvp_id:
+ pulsembed_url = self._search_regex(
+ r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
+ webpage, 'pulsembed url', group='url')
+ webpage = self._download_webpage(
+ pulsembed_url, video_id, 'Downloading pulsembed webpage')
+ mvp_id = self._search_mvp_id(webpage)
+
+ return self.url_result(
+ 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
diff --git a/hypervideo_dl/extractor/onionstudios.py b/hypervideo_dl/extractor/onionstudios.py
new file mode 100644
index 0000000..cf5c39e
--- /dev/null
+++ b/hypervideo_dl/extractor/onionstudios.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import js_to_json
+
+
+class OnionStudiosIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+
+ _TESTS = [{
+ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
+ 'md5': '5a118d466d62b5cd03647cf2c593977f',
+ 'info_dict': {
+ 'id': '3459881',
+ 'ext': 'mp4',
+ 'title': 'Hannibal charges forward, stops for a cocktail',
+ 'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'a.v. club',
+ 'upload_date': '20150619',
+ 'timestamp': 1434728546,
+ },
+ }, {
+ 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.onionstudios.com/video/6139.json',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
+ video_id)
+ mcp_id = compat_str(self._parse_json(self._search_regex(
+ r'window\.mcpMapping\s*=\s*({.+?});', webpage,
+ 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
+ return self.url_result(
+ 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
+ 'KinjaEmbed', mcp_id)
diff --git a/hypervideo_dl/extractor/ooyala.py b/hypervideo_dl/extractor/ooyala.py
new file mode 100644
index 0000000..eb957b8
--- /dev/null
+++ b/hypervideo_dl/extractor/ooyala.py
@@ -0,0 +1,210 @@
+from __future__ import unicode_literals
+
+import base64
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ unsmuggle_url,
+)
+
+
+class OoyalaBaseIE(InfoExtractor):
+ _PLAYER_BASE = 'http://player.ooyala.com/'
+ _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
+ _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s'
+
+ def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None):
+ content_tree = self._download_json(content_tree_url, video_id)['content_tree']
+ metadata = content_tree[list(content_tree)[0]]
+ embed_code = metadata['embed_code']
+ pcode = metadata.get('asset_pcode') or embed_code
+ title = metadata['title']
+
+ auth_data = self._download_json(
+ self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code),
+ video_id, headers=self.geo_verification_headers(), query={
+ 'domain': domain or 'player.ooyala.com',
+ 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
+ 'embedToken': embed_token,
+ })['authorization_data'][embed_code]
+
+ urls = []
+ formats = []
+ streams = auth_data.get('streams') or [{
+ 'delivery_type': 'hls',
+ 'url': {
+ 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(),
+ }
+ }]
+ for stream in streams:
+ url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
+ if not url_data:
+ continue
+ s_url = compat_b64decode(url_data).decode('utf-8')
+ if not s_url or s_url in urls:
+ continue
+ urls.append(s_url)
+ ext = determine_ext(s_url, None)
+ delivery_type = stream.get('delivery_type')
+ if delivery_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif delivery_type == 'hds' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+ elif delivery_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ s_url, embed_code, mpd_id='dash', fatal=False))
+ elif delivery_type == 'smooth':
+ self._extract_ism_formats(
+ s_url, embed_code, ism_id='mss', fatal=False)
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ s_url, embed_code, fatal=False))
+ else:
+ formats.append({
+ 'url': s_url,
+ 'ext': ext or delivery_type,
+ 'vcodec': stream.get('video_codec'),
+ 'format_id': delivery_type,
+ 'width': int_or_none(stream.get('width')),
+ 'height': int_or_none(stream.get('height')),
+ 'abr': int_or_none(stream.get('audio_bitrate')),
+ 'vbr': int_or_none(stream.get('video_bitrate')),
+ 'fps': float_or_none(stream.get('framerate')),
+ })
+ if not formats and not auth_data.get('authorized'):
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, auth_data['message']), expected=True)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles[lang] = [{
+ 'url': sub_url,
+ }]
+
+ return {
+ 'id': embed_code,
+ 'title': title,
+ 'description': metadata.get('description'),
+ 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+ 'duration': float_or_none(metadata.get('duration'), 1000),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+
+class OoyalaIE(OoyalaBaseIE):
+ _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
+
+ _TESTS = [
+ {
+ # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+ 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'info_dict': {
+ 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'ext': 'mp4',
+ 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+ 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ 'duration': 853.386,
+ },
+ # The video in the original webpage now uses PlayWire
+ 'skip': 'Ooyala said: movie expired',
+ }, {
+ # Only available for ipad
+ 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+ 'info_dict': {
+ 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+ 'ext': 'mp4',
+ 'title': 'Simulation Overview - Levels of Simulation',
+ 'duration': 194.948,
+ },
+ },
+ {
+ # Information available only through SAS api
+ # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
+ 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+ 'md5': 'a84001441b35ea492bc03736e59e7935',
+ 'info_dict': {
+ 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+ 'ext': 'mp4',
+ 'title': 'Divide Tool Path.mp4',
+ 'duration': 204.405,
+ }
+ },
+ {
+ # empty stream['url']['data']
+ 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
+ 'only_matching': True,
+ }
+ ]
+
+ @staticmethod
+ def _url_for_embed_code(embed_code):
+ return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+
+ @classmethod
+ def _build_url_result(cls, embed_code):
+ return cls.url_result(cls._url_for_embed_code(embed_code),
+ ie=cls.ie_key())
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ embed_code = self._match_id(url)
+ domain = smuggled_data.get('domain')
+ supportedformats = smuggled_data.get('supportedformats')
+ embed_token = smuggled_data.get('embed_token')
+ content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
+ return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token)
+
+
+class OoyalaExternalIE(OoyalaBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ ooyalaexternal:|
+ https?://.+?\.ooyala\.com/.*?\bexternalId=
+ )
+ (?P<partner_id>[^:]+)
+ :
+ (?P<id>.+)
+ (?:
+ :|
+ .*?&pcode=
+ )
+ (?P<pcode>.+?)
+ (?:&|$)
+ '''
+
+ _TEST = {
+ 'url': 'https://player.ooyala.com/player.js?externalId=espn:10365079&pcode=1kNG061cgaoolOncv54OAO1ceO-I&adSetCode=91cDU6NuXTGKz3OdjOxFdAgJVtQcKJnI&callback=handleEvents&hasModuleParams=1&height=968&playerBrandingId=7af3bd04449c444c964f347f11873075&targetReplaceId=videoPlayer&width=1656&wmode=opaque&allowScriptAccess=always',
+ 'info_dict': {
+ 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
+ 'ext': 'mp4',
+ 'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
+ 'duration': 1302.0,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
+ content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id)
+ return self._extract(content_tree_url, video_id)
diff --git a/hypervideo_dl/extractor/openload.py b/hypervideo_dl/extractor/openload.py
new file mode 100644
index 0000000..0c20d01
--- /dev/null
+++ b/hypervideo_dl/extractor/openload.py
@@ -0,0 +1,238 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import os
+import subprocess
+import tempfile
+
+from ..compat import (
+ compat_urlparse,
+ compat_kwargs,
+)
+from ..utils import (
+ check_executable,
+ encodeArgument,
+ ExtractorError,
+ get_exe_version,
+ is_outdated_version,
+ std_headers,
+)
+
+
+def cookie_to_dict(cookie):
+ cookie_dict = {
+ 'name': cookie.name,
+ 'value': cookie.value,
+ }
+ if cookie.port_specified:
+ cookie_dict['port'] = cookie.port
+ if cookie.domain_specified:
+ cookie_dict['domain'] = cookie.domain
+ if cookie.path_specified:
+ cookie_dict['path'] = cookie.path
+ if cookie.expires is not None:
+ cookie_dict['expires'] = cookie.expires
+ if cookie.secure is not None:
+ cookie_dict['secure'] = cookie.secure
+ if cookie.discard is not None:
+ cookie_dict['discard'] = cookie.discard
+ try:
+ if (cookie.has_nonstandard_attr('httpOnly')
+ or cookie.has_nonstandard_attr('httponly')
+ or cookie.has_nonstandard_attr('HttpOnly')):
+ cookie_dict['httponly'] = True
+ except TypeError:
+ pass
+ return cookie_dict
+
+
+def cookie_jar_to_list(cookie_jar):
+ return [cookie_to_dict(cookie) for cookie in cookie_jar]
+
+
+class PhantomJSwrapper(object):
+ """PhantomJS wrapper class
+
+ This class is experimental.
+ """
+
+ _TEMPLATE = r'''
+ phantom.onError = function(msg, trace) {{
+ var msgStack = ['PHANTOM ERROR: ' + msg];
+ if(trace && trace.length) {{
+ msgStack.push('TRACE:');
+ trace.forEach(function(t) {{
+ msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ + (t.function ? ' (in function ' + t.function +')' : ''));
+ }});
+ }}
+ console.error(msgStack.join('\n'));
+ phantom.exit(1);
+ }};
+ var page = require('webpage').create();
+ var fs = require('fs');
+ var read = {{ mode: 'r', charset: 'utf-8' }};
+ var write = {{ mode: 'w', charset: 'utf-8' }};
+ JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+ phantom.addCookie(x);
+ }});
+ page.settings.resourceTimeout = {timeout};
+ page.settings.userAgent = "{ua}";
+ page.onLoadStarted = function() {{
+ page.evaluate(function() {{
+ delete window._phantom;
+ delete window.callPhantom;
+ }});
+ }};
+ var saveAndExit = function() {{
+ fs.write("{html}", page.content, write);
+ fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
+ phantom.exit();
+ }};
+ page.onLoadFinished = function(status) {{
+ if(page.url === "") {{
+ page.setContent(fs.read("{html}", read), "{url}");
+ }}
+ else {{
+ {jscode}
+ }}
+ }};
+ page.open("");
+ '''
+
+ _TMP_FILE_NAMES = ['script', 'html', 'cookies']
+
+ @staticmethod
+ def _version():
+ return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
+
+ def __init__(self, extractor, required_version=None, timeout=10000):
+ self._TMP_FILES = {}
+
+ self.exe = check_executable('phantomjs', ['-v'])
+ if not self.exe:
+ raise ExtractorError('PhantomJS executable not found in PATH, '
+ 'download it from http://phantomjs.org',
+ expected=True)
+
+ self.extractor = extractor
+
+ if required_version:
+ version = self._version()
+ if is_outdated_version(version, required_version):
+ self.extractor._downloader.report_warning(
+ 'Your copy of PhantomJS is outdated, update it to version '
+ '%s or newer if you encounter any errors.' % required_version)
+
+ self.options = {
+ 'timeout': timeout,
+ }
+ for name in self._TMP_FILE_NAMES:
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.close()
+ self._TMP_FILES[name] = tmp
+
+ def __del__(self):
+ for name in self._TMP_FILE_NAMES:
+ try:
+ os.remove(self._TMP_FILES[name].name)
+ except (IOError, OSError, KeyError):
+ pass
+
+ def _save_cookies(self, url):
+ cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ for cookie in cookies:
+ if 'path' not in cookie:
+ cookie['path'] = '/'
+ if 'domain' not in cookie:
+ cookie['domain'] = compat_urlparse.urlparse(url).netloc
+ with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+ f.write(json.dumps(cookies).encode('utf-8'))
+
+ def _load_cookies(self):
+ with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+ cookies = json.loads(f.read().decode('utf-8'))
+ for cookie in cookies:
+ if cookie['httponly'] is True:
+ cookie['rest'] = {'httpOnly': None}
+ if 'expiry' in cookie:
+ cookie['expire_time'] = cookie['expiry']
+ self.extractor._set_cookie(**compat_kwargs(cookie))
+
+ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+ """
+ Downloads webpage (if needed) and executes JS
+
+ Params:
+ url: website url
+ html: optional, html code of website
+ video_id: video id
+ note: optional, displayed when downloading webpage
+ note2: optional, displayed when executing JS
+ headers: custom http headers
+ jscode: code to be executed when page is loaded
+
+ Returns tuple with:
+ * downloaded website (after JS execution)
+ * anything you print with `console.log` (but not inside `page.execute`!)
+
+ In most cases you don't need to add any `jscode`.
+ It is executed in `page.onLoadFinished`.
+ `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+ It is possible to wait for some element on the webpage, for example:
+ var check = function() {
+ var elementFound = page.evaluate(function() {
+ return document.querySelector('#b.done') !== null;
+ });
+ if(elementFound)
+ saveAndExit();
+ else
+ window.setTimeout(check, 500);
+ }
+
+ page.evaluate(function(){
+ document.querySelector('#a').click();
+ });
+ check();
+ """
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+ if not html:
+ html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+ with open(self._TMP_FILES['html'].name, 'wb') as f:
+ f.write(html.encode('utf-8'))
+
+ self._save_cookies(url)
+
+ replaces = self.options
+ replaces['url'] = url
+ user_agent = headers.get('User-Agent') or std_headers['User-Agent']
+ replaces['ua'] = user_agent.replace('"', '\\"')
+ replaces['jscode'] = jscode
+
+ for x in self._TMP_FILE_NAMES:
+ replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+
+ with open(self._TMP_FILES['script'].name, 'wb') as f:
+ f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
+
+ if video_id is None:
+ self.extractor.to_screen('%s' % (note2,))
+ else:
+ self.extractor.to_screen('%s: %s' % (video_id, note2))
+
+ p = subprocess.Popen([
+ self.exe, '--ssl-protocol=any',
+ self._TMP_FILES['script'].name
+ ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ if p.returncode != 0:
+ raise ExtractorError(
+ 'Executing JS failed\n:' + encodeArgument(err))
+ with open(self._TMP_FILES['html'].name, 'rb') as f:
+ html = f.read().decode('utf-8')
+
+ self._load_cookies()
+
+ return (html, encodeArgument(out))
diff --git a/hypervideo_dl/extractor/ora.py b/hypervideo_dl/extractor/ora.py
new file mode 100644
index 0000000..1d42be3
--- /dev/null
+++ b/hypervideo_dl/extractor/ora.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ get_element_by_attribute,
+ qualities,
+ unescapeHTML,
+)
+
+
+class OraTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
+ 'md5': 'fa33717591c631ec93b04b0e330df786',
+ 'info_dict': {
+ 'id': '50178',
+ 'ext': 'mp4',
+ 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
+ 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
+ }
+ }, {
+ 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = self._search_regex(
+ r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video')
+ m3u8_url = self._search_regex(
+ r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ # similar to GameSpotIE
+ m3u8_path = compat_urlparse.urlparse(m3u8_url).path
+ QUALITIES_RE = r'((,[a-z]+\d+)+,?)'
+ available_qualities = self._search_regex(
+ QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',')
+ http_path = m3u8_path[1:].split('/', 1)[1]
+ http_template = re.sub(QUALITIES_RE, r'%s', http_path)
+ http_template = http_template.replace('.csmil/master.m3u8', '')
+ http_template = compat_urlparse.urljoin(
+ 'http://videocdn-pmd.ora.tv/', http_template)
+ preference = qualities(
+ ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080'])
+ for q in available_qualities:
+ formats.append({
+ 'url': http_template % q,
+ 'format_id': q,
+ 'preference': preference(q),
+ })
+ self._sort_formats(formats)
+ else:
+ return self.url_result(self._search_regex(
+ r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
+
+ return {
+ 'id': self._search_regex(
+ r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
+ 'display_id': display_id,
+ 'title': unescapeHTML(self._og_search_title(webpage)),
+ 'description': get_element_by_attribute(
+ 'class', 'video_txt_decription', webpage),
+ 'thumbnail': self._proto_relative_url(self._search_regex(
+ r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/orf.py b/hypervideo_dl/extractor/orf.py
new file mode 100644
index 0000000..ed8a9a8
--- /dev/null
+++ b/hypervideo_dl/extractor/orf.py
@@ -0,0 +1,589 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ determine_ext,
+ float_or_none,
+ HEADRequest,
+ int_or_none,
+ orderedSet,
+ remove_end,
+ str_or_none,
+ strip_jsonp,
+ unescapeHTML,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class ORFTVthekIE(InfoExtractor):
+ IE_NAME = 'orf:tvthek'
+ IE_DESC = 'ORF TVthek'
+ _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
+ 'playlist': [{
+ 'md5': '2942210346ed779588f428a92db88712',
+ 'info_dict': {
+ 'id': '8896777',
+ 'ext': 'mp4',
+ 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
+ 'description': 'md5:c1272f0245537812d4e36419c207b67d',
+ 'duration': 2668,
+ 'upload_date': '20141208',
+ },
+ }],
+ 'skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
+ 'info_dict': {
+ 'id': '7982259',
+ 'ext': 'mp4',
+ 'title': 'Best of Ingrid Thurnher',
+ 'upload_date': '20140527',
+ 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads
+ },
+ 'skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tvthek.orf.at/profile/Universum/35429',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ data_jsb = self._parse_json(
+ self._search_regex(
+ r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+ webpage, 'playlist', group='json'),
+ playlist_id, transform_source=unescapeHTML)['playlist']['videos']
+
+ entries = []
+ for sd in data_jsb:
+ video_id, title = sd.get('id'), sd.get('title')
+ if not video_id or not title:
+ continue
+ video_id = compat_str(video_id)
+ formats = []
+ for fd in sd['sources']:
+ src = url_or_none(fd.get('src'))
+ if not src:
+ continue
+ format_id_list = []
+ for key in ('delivery', 'quality', 'quality_string'):
+ value = fd.get(key)
+ if value:
+ format_id_list.append(value)
+ format_id = '-'.join(format_id_list)
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ if any('/geoprotection' in f['url'] for f in m3u8_formats):
+ self.raise_geo_restricted()
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': src,
+ 'protocol': fd.get('protocol'),
+ })
+
+ # Check for geoblocking.
+ # There is a property is_geoprotection, but that's always false
+ geo_str = sd.get('geoprotection_string')
+ if geo_str:
+ try:
+ http_url = next(
+ f['url']
+ for f in formats
+ if re.match(r'^https?://.*\.mp4$', f['url']))
+ except StopIteration:
+ pass
+ else:
+ req = HEADRequest(http_url)
+ self._request_webpage(
+ req, video_id,
+ note='Testing for geoblocking',
+ errnote=((
+ 'This video seems to be blocked outside of %s. '
+ 'You may want to try the streaming-* formats.')
+ % geo_str),
+ fatal=False)
+
+ self._check_formats(formats, video_id)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for sub in sd.get('subtitles', []):
+ sub_src = sub.get('src')
+ if not sub_src:
+ continue
+ subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
+ 'url': sub_src,
+ })
+
+ upload_date = unified_strdate(sd.get('created_date'))
+
+ thumbnails = []
+ preview = sd.get('preview_image_url')
+ if preview:
+ thumbnails.append({
+ 'id': 'preview',
+ 'url': preview,
+ 'preference': 0,
+ })
+ image = sd.get('image_full_url')
+ if not image and len(data_jsb) == 1:
+ image = self._og_search_thumbnail(webpage)
+ if image:
+ thumbnails.append({
+ 'id': 'full',
+ 'url': image,
+ 'preference': 1,
+ })
+
+ entries.append({
+ '_type': 'video',
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': sd.get('description'),
+ 'duration': int_or_none(sd.get('duration_in_seconds')),
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': playlist_id,
+ }
+
+
+class ORFRadioIE(InfoExtractor):
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_date = mobj.group('date')
+ show_id = mobj.group('show')
+
+ data = self._download_json(
+ 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
+ % (self._API_STATION, show_id, show_date), show_id)
+
+ entries = []
+ for info in data['streams']:
+ loop_stream_id = str_or_none(info.get('loopStreamId'))
+ if not loop_stream_id:
+ continue
+ title = str_or_none(data.get('title'))
+ if not title:
+ continue
+ start = int_or_none(info.get('start'), scale=1000)
+ end = int_or_none(info.get('end'), scale=1000)
+ duration = end - start if end and start else None
+ entries.append({
+ 'id': loop_stream_id.replace('.mp3', ''),
+ 'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
+ 'title': title,
+ 'description': clean_html(data.get('subtitle')),
+ 'duration': duration,
+ 'timestamp': start,
+ 'ext': 'mp3',
+ 'series': data.get('programTitle'),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': data.get('title'),
+ 'description': clean_html(data.get('subtitle')),
+ 'entries': entries,
+ }
+
+
+class ORFFM4IE(ORFRadioIE):
+ IE_NAME = 'orf:fm4'
+ IE_DESC = 'radio FM4'
+ _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
+ _API_STATION = 'fm4'
+ _LOOP_STATION = 'fm4'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/player/20170107/4CC',
+ 'md5': '2b0be47375432a7ef104453432a19212',
+ 'info_dict': {
+ 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
+ 'ext': 'mp3',
+ 'title': 'Solid Steel Radioshow',
+ 'description': 'Die Mixshow von Coldcut und Ninja Tune.',
+ 'duration': 3599,
+ 'timestamp': 1483819257,
+ 'upload_date': '20170107',
+ },
+ 'skip': 'Shows from ORF radios are only available for 7 days.',
+ 'only_matching': True,
+ }
+
+
+class ORFNOEIE(ORFRadioIE):
+ IE_NAME = 'orf:noe'
+ IE_DESC = 'Radio Niederösterreich'
+ _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'noe'
+ _LOOP_STATION = 'oe2n'
+
+ _TEST = {
+ 'url': 'https://noe.orf.at/player/20200423/NGM',
+ 'only_matching': True,
+ }
+
+
+class ORFWIEIE(ORFRadioIE):
+ IE_NAME = 'orf:wien'
+ IE_DESC = 'Radio Wien'
+ _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'wie'
+ _LOOP_STATION = 'oe2w'
+
+ _TEST = {
+ 'url': 'https://wien.orf.at/player/20200423/WGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFBGLIE(ORFRadioIE):
+ IE_NAME = 'orf:burgenland'
+ IE_DESC = 'Radio Burgenland'
+ _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'bgl'
+ _LOOP_STATION = 'oe2b'
+
+ _TEST = {
+ 'url': 'https://burgenland.orf.at/player/20200423/BGM',
+ 'only_matching': True,
+ }
+
+
+class ORFOOEIE(ORFRadioIE):
+ IE_NAME = 'orf:oberoesterreich'
+ IE_DESC = 'Radio Oberösterreich'
+ _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'ooe'
+ _LOOP_STATION = 'oe2o'
+
+ _TEST = {
+ 'url': 'https://ooe.orf.at/player/20200423/OGMO',
+ 'only_matching': True,
+ }
+
+
+class ORFSTMIE(ORFRadioIE):
+ IE_NAME = 'orf:steiermark'
+ IE_DESC = 'Radio Steiermark'
+ _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'stm'
+ _LOOP_STATION = 'oe2st'
+
+ _TEST = {
+ 'url': 'https://steiermark.orf.at/player/20200423/STGMS',
+ 'only_matching': True,
+ }
+
+
+class ORFKTNIE(ORFRadioIE):
+ IE_NAME = 'orf:kaernten'
+ IE_DESC = 'Radio Kärnten'
+ _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'ktn'
+ _LOOP_STATION = 'oe2k'
+
+ _TEST = {
+ 'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
+ 'only_matching': True,
+ }
+
+
+class ORFSBGIE(ORFRadioIE):
+ IE_NAME = 'orf:salzburg'
+ IE_DESC = 'Radio Salzburg'
+ _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'sbg'
+ _LOOP_STATION = 'oe2s'
+
+ _TEST = {
+ 'url': 'https://salzburg.orf.at/player/20200423/SGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFTIRIE(ORFRadioIE):
+ IE_NAME = 'orf:tirol'
+ IE_DESC = 'Radio Tirol'
+ _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'tir'
+ _LOOP_STATION = 'oe2t'
+
+ _TEST = {
+ 'url': 'https://tirol.orf.at/player/20200423/TGUMO',
+ 'only_matching': True,
+ }
+
+
+class ORFVBGIE(ORFRadioIE):
+ IE_NAME = 'orf:vorarlberg'
+ IE_DESC = 'Radio Vorarlberg'
+ _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'vbg'
+ _LOOP_STATION = 'oe2v'
+
+ _TEST = {
+ 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFOE3IE(ORFRadioIE):
+ IE_NAME = 'orf:oe3'
+ IE_DESC = 'Radio Österreich 3'
+ _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'oe3'
+ _LOOP_STATION = 'oe3'
+
+ _TEST = {
+ 'url': 'https://oe3.orf.at/player/20200424/3WEK',
+ 'only_matching': True,
+ }
+
+
+class ORFOE1IE(ORFRadioIE):
+ IE_NAME = 'orf:oe1'
+ IE_DESC = 'Radio Österreich 1'
+ _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'oe1'
+ _LOOP_STATION = 'oe1'
+
+ _TEST = {
+ 'url': 'http://oe1.orf.at/player/20170108/456544',
+ 'md5': '34d8a6e67ea888293741c86a099b745b',
+ 'info_dict': {
+ 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
+ 'ext': 'mp3',
+ 'title': 'Morgenjournal',
+ 'duration': 609,
+ 'timestamp': 1483858796,
+ 'upload_date': '20170108',
+ },
+ 'skip': 'Shows from ORF radios are only available for 7 days.'
+ }
+
+
+class ORFIPTVIE(InfoExtractor):
+ IE_NAME = 'orf:iptv'
+ IE_DESC = 'iptv.ORF.at'
+ _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://iptv.orf.at/stories/2275236/',
+ 'md5': 'c8b22af4718a4b4af58342529453e3e5',
+ 'info_dict': {
+ 'id': '350612',
+ 'ext': 'flv',
+ 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+ 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+ 'duration': 68.197,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150425',
+ },
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+ video_id = self._search_regex(
+ r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['default']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class ORFFM4StoryIE(InfoExtractor):
+ IE_NAME = 'orf:fm4:story'
+ IE_DESC = 'fm4.orf.at stories'
+ _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/stories/2865738/',
+ 'playlist': [{
+ 'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
+ 'info_dict': {
+ 'id': '547792',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ 'duration': 1748.52,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ },
+ }, {
+ 'md5': 'c6dd2179731f86f4f55a7b49899d515f',
+ 'info_dict': {
+ 'id': '547798',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live (2)',
+ 'duration': 1504.08,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ },
+ }],
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+
+ entries = []
+ all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
+ for idx, video_id in enumerate(all_ids):
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['q8c']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
+ if idx >= 1:
+ # Titles are duplicates, make them unique
+ title += ' (' + str(idx + 1) + ')'
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries)
diff --git a/hypervideo_dl/extractor/outsidetv.py b/hypervideo_dl/extractor/outsidetv.py
new file mode 100644
index 0000000..c5333b0
--- /dev/null
+++ b/hypervideo_dl/extractor/outsidetv.py
@@ -0,0 +1,28 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class OutsideTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4',
+ 'md5': '192d968fedc10b2f70ec31865ffba0da',
+ 'info_dict': {
+ 'id': 'Hdg0jukV',
+ 'ext': 'mp4',
+ 'title': 'Home - Jackson Ep 1 | Arbor Snowboards',
+ 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd',
+ 'upload_date': '20181225',
+ 'timestamp': 1545742800,
+ }
+ }, {
+ 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ jw_media_id = self._match_id(url)
+ return self.url_result(
+ 'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id)
diff --git a/hypervideo_dl/extractor/packtpub.py b/hypervideo_dl/extractor/packtpub.py
new file mode 100644
index 0000000..11ad3b3
--- /dev/null
+++ b/hypervideo_dl/extractor/packtpub.py
@@ -0,0 +1,164 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ # compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ # remove_end,
+ str_or_none,
+ strip_or_none,
+ unified_timestamp,
+ # urljoin,
+)
+
+
+class PacktPubBaseIE(InfoExtractor):
+ # _PACKT_BASE = 'https://www.packtpub.com'
+ _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/'
+
+
+class PacktPubIE(PacktPubBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?'
+
+ _TESTS = [{
+ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
+ 'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
+ 'info_dict': {
+ 'id': '20530',
+ 'ext': 'mp4',
+ 'title': 'Project Intro',
+ 'thumbnail': r're:(?i)^https?://.*\.jpg',
+ 'timestamp': 1490918400,
+ 'upload_date': '20170331',
+ },
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project',
+ 'only_matching': True,
+ }]
+ _NETRC_MACHINE = 'packtpub'
+ _TOKEN = None
+
+ def _real_initialize(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ try:
+ self._TOKEN = self._download_json(
+ 'https://services.packtpub.com/auth-v1/users/tokens', None,
+ 'Downloading Authorization Token', data=json.dumps({
+ 'username': username,
+ 'password': password,
+ }).encode())['data']['access']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
+ message = self._parse_json(e.cause.read().decode(), None)['message']
+ raise ExtractorError(message, expected=True)
+ raise
+
+ def _real_extract(self, url):
+ course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups()
+
+ headers = {}
+ if self._TOKEN:
+ headers['Authorization'] = 'Bearer ' + self._TOKEN
+ try:
+ video_url = self._download_json(
+ 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id,
+ 'Downloading JSON video', headers=headers)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ self.raise_login_required('This video is locked')
+ raise
+
+ # TODO: find a better way to avoid duplicating course requests
+ # metadata = self._download_json(
+ # '%s/products/%s/chapters/%s/sections/%s/metadata'
+ # % (self._MAPT_REST, course_id, chapter_id, video_id),
+ # video_id)['data']
+
+ # title = metadata['pageTitle']
+ # course_title = metadata.get('title')
+ # if course_title:
+ # title = remove_end(title, ' - %s' % course_title)
+ # timestamp = unified_timestamp(metadata.get('publicationDate'))
+ # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': display_id or video_id, # title,
+ # 'thumbnail': thumbnail,
+ # 'timestamp': timestamp,
+ }
+
+
+class PacktPubCourseIE(PacktPubBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
+ 'info_dict': {
+ 'id': '9781787122215',
+ 'title': 'Learn Nodejs by building 12 projects [Video]',
+ 'description': 'md5:489da8d953f416e51927b60a1c7db0aa',
+ },
+ 'playlist_count': 90,
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PacktPubIE.suitable(url) else super(
+ PacktPubCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ url, course_id = mobj.group('url', 'id')
+
+ course = self._download_json(
+ self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id)
+ metadata = self._download_json(
+ self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id,
+ course_id, fatal=False) or {}
+
+ entries = []
+ for chapter_num, chapter in enumerate(course['chapters'], 1):
+ chapter_id = str_or_none(chapter.get('id'))
+ sections = chapter.get('sections')
+ if not chapter_id or not isinstance(sections, list):
+ continue
+ chapter_info = {
+ 'chapter': chapter.get('title'),
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ }
+ for section in sections:
+ section_id = str_or_none(section.get('id'))
+ if not section_id or section.get('contentType') != 'video':
+ continue
+ entry = {
+ '_type': 'url_transparent',
+ 'url': '/'.join([url, chapter_id, section_id]),
+ 'title': strip_or_none(section.get('title')),
+ 'description': clean_html(section.get('summary')),
+ 'thumbnail': metadata.get('coverImage'),
+ 'timestamp': unified_timestamp(metadata.get('publicationDate')),
+ 'ie_key': PacktPubIE.ie_key(),
+ }
+ entry.update(chapter_info)
+ entries.append(entry)
+
+ return self.playlist_result(
+ entries, course_id, metadata.get('title'),
+ clean_html(metadata.get('about')))
diff --git a/hypervideo_dl/extractor/palcomp3.py b/hypervideo_dl/extractor/palcomp3.py
new file mode 100644
index 0000000..fb29d83
--- /dev/null
+++ b/hypervideo_dl/extractor/palcomp3.py
@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class PalcoMP3BaseIE(InfoExtractor):
+ _GQL_QUERY_TMPL = '''{
+ artist(slug: "%s") {
+ %s
+ }
+}'''
+ _ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
+ %s
+ }'''
+ _MUSIC_FIELDS = '''duration
+ hls
+ mp3File
+ musicID
+ plays
+ title'''
+
+ def _call_api(self, artist_slug, artist_fields):
+ return self._download_json(
+ 'https://www.palcomp3.com.br/graphql/', artist_slug, query={
+ 'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
+ })['data']
+
+ def _parse_music(self, music):
+ music_id = compat_str(music['musicID'])
+ title = music['title']
+
+ formats = []
+ hls_url = music.get('hls')
+ if hls_url:
+ formats.append({
+ 'url': hls_url,
+ 'protocol': 'm3u8_native',
+ 'ext': 'mp4',
+ })
+ mp3_file = music.get('mp3File')
+ if mp3_file:
+ formats.append({
+ 'url': mp3_file,
+ })
+
+ return {
+ 'id': music_id,
+ 'title': title,
+ 'formats': formats,
+ 'duration': int_or_none(music.get('duration')),
+ 'view_count': int_or_none(music.get('plays')),
+ }
+
+ def _real_initialize(self):
+ self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
+
+ def _real_extract(self, url):
+ artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
+ artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
+ music = self._call_api(artist_slug, artist_fields)['artist']['music']
+ return self._parse_music(music)
+
+
+class PalcoMP3IE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:song'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
+ 'md5': '99fd6405b2d8fd589670f6db1ba3b358',
+ 'info_dict': {
+ 'id': '3162927',
+ 'ext': 'mp3',
+ 'title': 'Nossas Composições - CUIDA BEM DELA',
+ 'duration': 210,
+ 'view_count': int,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
+
+
+class PalcoMP3ArtistIE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:artist'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com.br/condedoforro/',
+ 'info_dict': {
+ 'id': '358396',
+ 'title': 'Conde do Forró',
+ },
+ 'playlist_mincount': 188,
+ }]
+ _ARTIST_FIELDS_TMPL = '''artistID
+ musics {
+ nodes {
+ %s
+ }
+ }
+ name'''
+
+ @ classmethod
+ def suitable(cls, url):
+ return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ artist_slug = self._match_id(url)
+ artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
+
+ def entries():
+ for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+ yield self._parse_music(music)
+
+ return self.playlist_result(
+ entries(), str_or_none(artist.get('artistID')), artist.get('name'))
+
+
+class PalcoMP3VideoIE(PalcoMP3BaseIE):
+ IE_NAME = 'PalcoMP3:video'
+ _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
+ _TESTS = [{
+ 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '_pD1nR2qqPg',
+ 'ext': 'mp4',
+ 'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
+ 'description': 'md5:7043342c09a224598e93546e98e49282',
+ 'upload_date': '20161107',
+ 'uploader_id': 'maiaramaraisaoficial',
+ 'uploader': 'Maiara e Maraisa',
+ }
+ }]
+ _MUSIC_FIELDS = 'youtubeID'
+
+ def _parse_music(self, music):
+ youtube_id = music['youtubeID']
+ return self.url_result(youtube_id, 'Youtube', youtube_id)
diff --git a/hypervideo_dl/extractor/pandoratv.py b/hypervideo_dl/extractor/pandoratv.py
new file mode 100644
index 0000000..538738c
--- /dev/null
+++ b/hypervideo_dl/extractor/pandoratv.py
@@ -0,0 +1,134 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ parse_duration,
+ str_to_int,
+ urlencode_postdata,
+)
+
+
+class PandoraTVIE(InfoExtractor):
+ IE_NAME = 'pandora.tv'
+ IE_DESC = '판도라TV'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?pandora\.tv/view/(?P<user_id>[^/]+)/(?P<id>\d+)| # new format
+ (?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?| # old format
+ m\.pandora\.tv/?\? # mobile
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
+ 'info_dict': {
+ 'id': '53294230',
+ 'ext': 'flv',
+ 'title': '頭を撫でてくれる?',
+ 'description': '頭を撫でてくれる?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 39,
+ 'upload_date': '20151218',
+ 'uploader': 'カワイイ動物まとめ',
+ 'uploader_id': 'mikakim',
+ 'view_count': int,
+ 'like_count': int,
+ }
+ }, {
+ 'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
+ 'info_dict': {
+ 'id': '54721744',
+ 'ext': 'flv',
+ 'title': '[HD] JAPAN COUNTDOWN 170423',
+ 'description': '[HD] JAPAN COUNTDOWN 170423',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1704.9,
+ 'upload_date': '20170423',
+ 'uploader': 'GOGO_UCC',
+ 'uploader_id': 'gogoucc',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ # Test metadata only
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.pandora.tv/view/mikakim/53294230#36797454_new',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.pandora.tv/?c=view&ch_userid=mikakim&prgid=54600346',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('user_id')
+ video_id = mobj.group('id')
+
+ if not user_id or not video_id:
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('prgid', [None])[0]
+ user_id = qs.get('ch_userid', [None])[0]
+ if any(not f for f in (video_id, user_id,)):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ data = self._download_json(
+ 'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s'
+ % (user_id, video_id), video_id)
+
+ info = data['data']['rows']['vod_play_info']['result']
+
+ formats = []
+ for format_id, format_url in info.items():
+ if not format_url:
+ continue
+ height = self._search_regex(
+ r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
+ if not height:
+ continue
+
+ play_url = self._download_json(
+ 'http://m.pandora.tv/?c=api&m=play_url', video_id,
+ data=urlencode_postdata({
+ 'prgid': video_id,
+ 'runtime': info.get('runtime'),
+ 'vod_url': format_url,
+ }),
+ headers={
+ 'Origin': url,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ format_url = play_url.get('url')
+ if not format_url:
+ continue
+
+ formats.append({
+ 'format_id': '%sp' % height,
+ 'url': format_url,
+ 'height': int(height),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['subject'],
+ 'description': info.get('body'),
+ 'thumbnail': info.get('thumbnail') or info.get('poster'),
+ 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
+ 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
+ 'uploader': info.get('nickname'),
+ 'uploader_id': info.get('upload_userid'),
+ 'view_count': str_to_int(info.get('hit')),
+ 'like_count': str_to_int(info.get('likecnt')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/parliamentliveuk.py b/hypervideo_dl/extractor/parliamentliveuk.py
new file mode 100644
index 0000000..bdd5ff5
--- /dev/null
+++ b/hypervideo_dl/extractor/parliamentliveuk.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ParliamentLiveUKIE(InfoExtractor):
+ IE_NAME = 'parliamentlive.tv'
+ IE_DESC = 'UK parliament videos'
+ _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _TESTS = [{
+ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+ 'info_dict': {
+ 'id': '1_af9nv9ym',
+ 'ext': 'mp4',
+ 'title': 'Home Affairs Committee',
+ 'uploader_id': 'FFMPEG-01',
+ 'timestamp': 1422696664,
+ 'upload_date': '20150131',
+ },
+ }, {
+ 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
+ widget_config = self._parse_json(self._search_regex(
+ r'(?s)kWidgetConfig\s*=\s*({.+});',
+ webpage, 'kaltura widget config'), video_id)
+ kaltura_url = 'kaltura:%s:%s' % (
+ widget_config['wid'][1:], widget_config['entry_id'])
+ event_title = self._download_json(
+ 'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
+ return {
+ '_type': 'url_transparent',
+ 'title': event_title,
+ 'description': '',
+ 'url': kaltura_url,
+ 'ie_key': 'Kaltura',
+ }
diff --git a/hypervideo_dl/extractor/patreon.py b/hypervideo_dl/extractor/patreon.py
new file mode 100644
index 0000000..761a4b1
--- /dev/null
+++ b/hypervideo_dl/extractor/patreon.py
@@ -0,0 +1,156 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+)
+
+
+class PatreonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.patreon.com/creation?hid=743933',
+ 'md5': 'e25505eec1053a6e6813b8ed369875cc',
+ 'info_dict': {
+ 'id': '743933',
+ 'ext': 'mp3',
+ 'title': 'Episode 166: David Smalley of Dogma Debate',
+ 'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ 'timestamp': 1406473987,
+ 'upload_date': '20140727',
+ 'uploader_id': '87145',
+ },
+ }, {
+ 'url': 'http://www.patreon.com/creation?hid=754133',
+ 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+ 'info_dict': {
+ 'id': '754133',
+ 'ext': 'mp3',
+ 'title': 'CD 167 Extra',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ },
+ 'skip': 'Patron-only content',
+ }, {
+ 'url': 'https://www.patreon.com/creation?hid=1682498',
+ 'info_dict': {
+ 'id': 'SU4fj_aEMVw',
+ 'ext': 'mp4',
+ 'title': 'I\'m on Patreon!',
+ 'uploader': 'TraciJHines',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20150211',
+ 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+ 'uploader_id': 'TraciJHines',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.patreon.com/posts/743933',
+ 'only_matching': True,
+ }]
+
+ # Currently Patreon exposes download URL via hidden CSS, so login is not
+ # needed. Keeping this commented for when this inevitably changes.
+ '''
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'redirectUrl': 'http://www.patreon.com/',
+ 'email': username,
+ 'password': password,
+ }
+
+ request = sanitized_Request(
+ 'https://www.patreon.com/processLogin',
+ compat_urllib_parse_urlencode(login_form).encode('utf-8')
+ )
+ login_page = self._download_webpage(request, None, note='Logging in')
+
+ if re.search(r'onLoginFailed', login_page):
+ raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+ '''
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ post = self._download_json(
+ 'https://www.patreon.com/api/posts/' + video_id, video_id, query={
+ 'fields[media]': 'download_url,mimetype,size_bytes',
+ 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
+ 'fields[user]': 'full_name,url',
+ 'json-api-use-default-includes': 'false',
+ 'include': 'media,user',
+ })
+ attributes = post['data']['attributes']
+ title = attributes['title'].strip()
+ image = attributes.get('image') or {}
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(attributes.get('content')),
+ 'thumbnail': image.get('large_url') or image.get('url'),
+ 'timestamp': parse_iso8601(attributes.get('published_at')),
+ 'like_count': int_or_none(attributes.get('like_count')),
+ 'comment_count': int_or_none(attributes.get('comment_count')),
+ }
+
+ for i in post.get('included', []):
+ i_type = i.get('type')
+ if i_type == 'media':
+ media_attributes = i.get('attributes') or {}
+ download_url = media_attributes.get('download_url')
+ ext = mimetype2ext(media_attributes.get('mimetype'))
+ if download_url and ext in KNOWN_EXTENSIONS:
+ info.update({
+ 'ext': ext,
+ 'filesize': int_or_none(media_attributes.get('size_bytes')),
+ 'url': download_url,
+ })
+ elif i_type == 'user':
+ user_attributes = i.get('attributes')
+ if user_attributes:
+ info.update({
+ 'uploader': user_attributes.get('full_name'),
+ 'uploader_id': str_or_none(i.get('id')),
+ 'uploader_url': user_attributes.get('url'),
+ })
+
+ if not info.get('url'):
+ embed_url = try_get(attributes, lambda x: x['embed']['url'])
+ if embed_url:
+ info.update({
+ '_type': 'url',
+ 'url': embed_url,
+ })
+
+ if not info.get('url'):
+ post_file = attributes['post_file']
+ ext = determine_ext(post_file.get('name'))
+ if ext in KNOWN_EXTENSIONS:
+ info.update({
+ 'ext': ext,
+ 'url': post_file['url'],
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/pbs.py b/hypervideo_dl/extractor/pbs.py
new file mode 100644
index 0000000..d4baa16
--- /dev/null
+++ b/hypervideo_dl/extractor/pbs.py
@@ -0,0 +1,710 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ float_or_none,
+ js_to_json,
+ orderedSet,
+ strip_jsonp,
+ strip_or_none,
+ unified_strdate,
+ url_or_none,
+ US_RATINGS,
+)
+
+
+class PBSIE(InfoExtractor):
+ _STATIONS = (
+ (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
+ (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
+ (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
+ (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
+ (r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
+ (r'video\.wfsu\.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
+ (r'video\.wsre\.org', 'WSRE (WSRE)'), # http://www.wsre.org
+ (r'video\.wtcitv\.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
+ (r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
+ (r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
+ # (r'kuac\.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
+ # (r'ktoo\.org', '360 North (KTOO)'), # http://www.ktoo.org/
+ # (r'azpm\.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
+ (r'video\.azpbs\.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
+ (r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
+ (r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
+ (r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
+ (r'video\.ket\.org', 'KET (WKLE)'), # http://www.ket.org/
+ (r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
+ (r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
+ (r'videos\.oeta\.tv', 'OETA (KETA)'), # http://www.oeta.tv
+ (r'video\.optv\.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
+ (r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
+ (r'video\.keet\.org', 'KEET TV (KEET)'), # http://www.keet.org
+ (r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
+ (r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
+ (r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
+ (r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
+ (r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
+ (r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
+ (r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
+ (r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
+ (r'video\.soptv\.org', 'SOPTV (KSYS)'), # http://www.soptv.org
+ # (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
+ # (r'krcb\.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
+ # (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
+ (r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
+ (r'video\.kenw\.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
+ (r'video\.kued\.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
+ (r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
+ (r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
+ (r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
+ (r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
+ (r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
+ (r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
+ (r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
+ # (r'ripbs\.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
+ (r'watch\.wliw\.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
+ (r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
+ (r'watch\.weta\.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
+ (r'video\.whyy\.org', 'WHYY (WHYY)'), # http://www.whyy.org
+ (r'video\.wlvt\.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
+ (r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
+ (r'video\.whut\.org', 'Howard University Television (WHUT)'), # http://www.whut.org
+ (r'video\.wedu\.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
+ (r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
+ # (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
+ (r'video\.wpbt2\.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
+ (r'video\.wucftv\.org', 'WUCF TV (WUCF)'), # http://wucftv.org
+ (r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
+ (r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
+ (r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
+ (r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
+ (r'video\.scetv\.org', 'ETV (WRLK)'), # http://www.scetv.org
+ (r'video\.unctv\.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
+ # (r'pbsguam\.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
+ (r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
+ (r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
+ (r'video\.ksps\.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
+ (r'watch\.opb\.org', 'OPB (KOPB)'), # http://www.opb.org
+ (r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
+ (r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
+ (r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
+ (r'video\.wttw\.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
+ # (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
+ (r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
+ (r'video\.ninenet\.org', 'Nine Network (KETC)'), # http://www.ninenet.org
+ (r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
+ (r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
+ (r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
+ (r'video\.wnin\.org', 'WNIN (WNIN)'), # http://www.wnin.org/
+ (r'video\.wnit\.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
+ (r'video\.wpt\.org', 'WPT (WPNE)'), # http://www.wpt.org/
+ (r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
+ (r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
+ (r'video\.wqpt\.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
+ (r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
+ # (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
+ (r'video\.wipb\.org', 'WIPB-TV (WIPB)'), # http://wipb.org
+ (r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
+ (r'watch\.cetconnect\.org', 'CET (WCET)'), # http://www.cetconnect.org
+ (r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
+ (r'video\.wbgu\.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
+ (r'video\.wgvu\.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
+ (r'video\.netnebraska\.org', 'NET1 (KUON)'), # http://netnebraska.org
+ (r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
+ (r'watch\.sdpb\.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
+ (r'video\.tpt\.org', 'TPT (KTCA)'), # http://www.tpt.org
+ (r'watch\.ksmq\.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
+ (r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
+ (r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
+ # (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
+ # (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
+ # (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
+ (r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
+ (r'video\.wcte\.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
+ (r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
+ (r'video\.wosu\.org', 'WOSU TV (WOSU)'), # http://wosu.org/
+ (r'video\.woub\.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
+ (r'video\.wvpublic\.org', 'WVPB (WVPB)'), # http://wvpublic.org/
+ (r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
+ # (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
+ (r'video\.kera\.org', 'KERA 13 (KERA)'), # http://www.kera.org/
+ (r'video\.mpbn\.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
+ (r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
+ (r'video\.nhptv\.org', 'NHPTV (WENH)'), # http://nhptv.org/
+ (r'video\.vpt\.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
+ (r'video\.witf\.org', 'witf (WITF)'), # http://www.witf.org
+ (r'watch\.wqed\.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
+ (r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
+ (r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
+ (r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
+ (r'video\.wcmu\.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
+ (r'video\.wkar\.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
+ (r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
+ (r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
+ (r'video\.wgte\.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
+ (r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
+ # (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
+ (r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
+ (r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
+ (r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
+ (r'video\.kacvtv\.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
+ (r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
+ (r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
+ (r'video\.wned\.org', 'WNED (WNED)'), # http://www.wned.org/
+ (r'watch\.wpbstv\.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
+ (r'video\.wskg\.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
+ (r'video\.wxxi\.org', 'WXXI (WXXI)'), # http://wxxi.org
+ (r'video\.wpsu\.org', 'WPSU (WPSU)'), # http://www.wpsu.org
+ # (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
+ (r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
+ (r'video\.wtvi\.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
+ # (r'whro\.org', 'WHRO (WHRO)'), # http://whro.org
+ (r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
+ (r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
+ (r'video\.kcts9\.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
+ (r'video\.basinpbs\.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
+ (r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
+ # (r'tamu\.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
+ # (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
+ (r'video\.klrn\.org', 'KLRN (KLRN)'), # http://www.klrn.org
+ (r'video\.klru\.tv', 'KLRU (KLRU)'), # http://www.klru.org
+ # (r'kmbh\.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
+ # (r'knct\.org', 'KNCT (KNCT)'), # http://www.knct.org
+ # (r'ktxt\.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
+ (r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
+ (r'video\.ideastations\.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
+ (r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
+ )
+
+ IE_NAME = 'pbs'
+ IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1])
+
+ _VALID_URL = r'''(?x)https?://
+ (?:
+ # Direct video URL
+ (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
+ # Article with embedded player (or direct video)
+ (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
+ # Player
+ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
+ )
+ ''' % '|'.join(list(zip(*_STATIONS))[0])
+
+ _GEO_COUNTRIES = ['US']
+
+ _TESTS = [
+ {
+ 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
+ 'md5': '173dc391afd361fa72eab5d3d918968d',
+ 'info_dict': {
+ 'id': '2365006249',
+ 'ext': 'mp4',
+ 'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
+ 'description': 'md5:31b664af3c65fd07fa460d306b837d00',
+ 'duration': 3190,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
+ 'md5': '6f722cb3c3982186d34b0f13374499c7',
+ 'info_dict': {
+ 'id': '2365297690',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - Losing Iraq',
+ 'description': 'md5:5979a4d069b157f622d02bff62fbe654',
+ 'duration': 5050,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
+ 'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
+ 'info_dict': {
+ 'id': '2201174722',
+ 'ext': 'mp4',
+ 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
+ 'description': 'md5:86ab9a3d04458b876147b355788b8781',
+ 'duration': 801,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
+ 'md5': 'c62859342be2a0358d6c9eb306595978',
+ 'info_dict': {
+ 'id': '2365297708',
+ 'ext': 'mp4',
+ 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
+ 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
+ 'duration': 6559,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
+ 'md5': '908f3e5473a693b266b84e25e1cf9703',
+ 'info_dict': {
+ 'id': '2365160389',
+ 'display_id': 'killer-typhoon',
+ 'ext': 'mp4',
+ 'description': 'md5:c741d14e979fc53228c575894094f157',
+ 'title': 'NOVA - Killer Typhoon',
+ 'duration': 3172,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140122',
+ 'age_limit': 10,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+ 'info_dict': {
+ 'id': 'united-states-of-secrets',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
+ 'info_dict': {
+ 'id': 'great-war',
+ },
+ 'playlist_count': 3,
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
+ 'info_dict': {
+ 'id': '2276541483',
+ 'display_id': 'player',
+ 'ext': 'mp4',
+ 'title': 'American Experience - Death and the Civil War, Chapter 1',
+ 'description': 'md5:67fa89a9402e2ee7d08f53b920674c18',
+ 'duration': 682,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/video/2365245528/',
+ 'md5': '115223d41bd55cda8ae5cd5ed4e11497',
+ 'info_dict': {
+ 'id': '2365245528',
+ 'display_id': '2365245528',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - United States of Secrets (Part One)',
+ 'description': 'md5:55756bd5c551519cc4b7703e373e217e',
+ 'duration': 6851,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # Video embedded in iframe containing angle brackets as attribute's value (e.g.
+ # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
+ # https://github.com/ytdl-org/youtube-dl/issues/7059)
+ 'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
+ 'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
+ 'info_dict': {
+ 'id': '2365546844',
+ 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
+ 'ext': 'mp4',
+ 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
+ 'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5',
+ 'duration': 1480,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # Frontline video embedded via flp2012.js
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists',
+ 'info_dict': {
+ 'id': '2070868960',
+ 'display_id': 'the-atomic-artists',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - The Atomic Artists',
+ 'description': 'md5:f677e4520cfacb4a5ce1471e31b57800',
+ 'duration': 723,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ },
+ {
+ # Serves hd only via wigget/partnerplayer page
+ 'url': 'http://www.pbs.org/video/2365641075/',
+ 'md5': 'fdf907851eab57211dd589cf12006666',
+ 'info_dict': {
+ 'id': '2365641075',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - Netanyahu at War',
+ 'duration': 6852,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'formats': 'mincount:8',
+ },
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/issues/13801
+ 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
+ 'info_dict': {
+ 'id': '3003333873',
+ 'ext': 'mp4',
+ 'title': 'PBS NewsHour - full episode July 31, 2017',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 3265,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
+ 'info_dict': {
+ 'id': '2365936247',
+ 'ext': 'mp4',
+ 'title': 'Antiques Roadshow - Indianapolis, Hour 2',
+ 'description': 'md5:524b32249db55663e7231b6b8d1671a2',
+ 'duration': 3180,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
+ 'info_dict': {
+ 'id': '3007193718',
+ 'ext': 'mp4',
+ 'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
+ 'description': 'md5:37efbac85e0c09b009586523ec143652',
+ 'duration': 6292,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
+ 'info_dict': {
+ 'id': '3011407934',
+ 'ext': 'mp4',
+ 'title': 'Stories from the Stage - Road Trip',
+ 'duration': 1619,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://watch.knpb.org/video/2365616055/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
+ 'only_matching': True,
+ }
+ ]
+ _ERRORS = {
+ 101: 'We\'re sorry, but this video is not yet available.',
+ 403: 'We\'re sorry, but this video is not available in your region due to right restrictions.',
+ 404: 'We are experiencing technical difficulties that are preventing us from playing the video at this time. Please check back again soon.',
+ 410: 'This video has expired and is no longer available for online streaming.',
+ }
+
+ def _real_initialize(self):
+ cookie = (self._download_json(
+ 'http://localization.services.pbs.org/localize/auto/cookie/',
+ None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
+ if cookie:
+ station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
+ if station:
+ self._set_cookie('.pbs.org', 'pbsol.station', station)
+
+ def _extract_webpage(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ description = None
+
+ presumptive_id = mobj.group('presumptive_id')
+ display_id = presumptive_id
+ if presumptive_id:
+ webpage = self._download_webpage(url, display_id)
+
+ description = strip_or_none(self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, default=None))
+ upload_date = unified_strdate(self._search_regex(
+ r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
+ webpage, 'upload date', default=None))
+
+ # tabbed frontline videos
+ MULTI_PART_REGEXES = (
+ r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
+ r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
+ )
+ for p in MULTI_PART_REGEXES:
+ tabbed_videos = orderedSet(re.findall(p, webpage))
+ if tabbed_videos:
+ return tabbed_videos, presumptive_id, upload_date, description
+
+ MEDIA_ID_REGEXES = [
+ r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
+ r'class="coveplayerid">([^<]+)<', # coveplayer
+ r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
+ r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
+ r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
+ r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
+ r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
+ ]
+
+ media_id = self._search_regex(
+ MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
+ if media_id:
+ return media_id, presumptive_id, upload_date, description
+
+ # Frontline video embedded via flp
+ video_id = self._search_regex(
+ r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
+ if video_id:
+ # pkg_id calculation is reverse engineered from
+ # http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js
+ prg_id = self._search_regex(
+ r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:]
+ if 'q' in prg_id:
+ prg_id = prg_id.split('q')[1]
+ prg_id = int(prg_id, 16)
+ getdir = self._download_json(
+ 'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
+ presumptive_id, 'Downloading getdir JSON',
+ transform_source=strip_jsonp)
+ return getdir['mid'], presumptive_id, upload_date, description
+
+ for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
+ url = self._search_regex(
+ r'src=(["\'])(?P<url>.+?partnerplayer.+?)\1', iframe,
+ 'player URL', default=None, group='url')
+ if url:
+ break
+
+ if not url:
+ url = self._og_search_url(webpage)
+
+ mobj = re.match(
+ self._VALID_URL, self._proto_relative_url(url.strip()))
+
+ player_id = mobj.group('player_id')
+ if not display_id:
+ display_id = player_id
+ if player_id:
+ player_page = self._download_webpage(
+ url, display_id, note='Downloading player page',
+ errnote='Could not download player page')
+ video_id = self._search_regex(
+ r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
+ default=None)
+ if not video_id:
+ video_info = self._extract_video_data(
+ player_page, 'video data', display_id)
+ video_id = compat_str(
+ video_info.get('id') or video_info['contentID'])
+ else:
+ video_id = mobj.group('id')
+ display_id = video_id
+
+ return video_id, display_id, None, description
+
+ def _extract_video_data(self, string, name, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ [r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+ r'window\.videoBridge\s*=\s*({.+?});'],
+ string, name, default='{}'),
+ video_id, transform_source=js_to_json, fatal=fatal)
+
+ def _real_extract(self, url):
+ video_id, display_id, upload_date, description = self._extract_webpage(url)
+
+ if isinstance(video_id, list):
+ entries = [self.url_result(
+ 'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+ for vid_id in video_id]
+ return self.playlist_result(entries, display_id)
+
+ info = None
+ redirects = []
+ redirect_urls = set()
+
+ def extract_redirect_urls(info):
+ for encoding_name in ('recommended_encoding', 'alternate_encoding'):
+ redirect = info.get(encoding_name)
+ if not redirect:
+ continue
+ redirect_url = redirect.get('url')
+ if redirect_url and redirect_url not in redirect_urls:
+ redirects.append(redirect)
+ redirect_urls.add(redirect_url)
+ encodings = info.get('encodings')
+ if isinstance(encodings, list):
+ for encoding in encodings:
+ encoding_url = url_or_none(encoding)
+ if encoding_url and encoding_url not in redirect_urls:
+ redirects.append({'url': encoding_url})
+ redirect_urls.add(encoding_url)
+
+ chapters = []
+ # Player pages may also serve different qualities
+ for page in ('widget/partnerplayer', 'portalplayer'):
+ player = self._download_webpage(
+ 'http://player.pbs.org/%s/%s' % (page, video_id),
+ display_id, 'Downloading %s page' % page, fatal=False)
+ if player:
+ video_info = self._extract_video_data(
+ player, '%s video data' % page, display_id, fatal=False)
+ if video_info:
+ extract_redirect_urls(video_info)
+ if not info:
+ info = video_info
+ if not chapters:
+ raw_chapters = video_info.get('chapters') or []
+ if not raw_chapters:
+ for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+ chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+ if not chapter:
+ continue
+ raw_chapters.append(chapter)
+ for chapter in raw_chapters:
+ start_time = float_or_none(chapter.get('start_time'), 1000)
+ duration = float_or_none(chapter.get('duration'), 1000)
+ if start_time is None or duration is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': start_time + duration,
+ 'title': chapter.get('title'),
+ })
+
+ formats = []
+ http_url = None
+ for num, redirect in enumerate(redirects):
+ redirect_id = redirect.get('eeid')
+
+ redirect_info = self._download_json(
+ '%s?format=json' % redirect['url'], display_id,
+ 'Downloading %s video url info' % (redirect_id or num),
+ headers=self.geo_verification_headers())
+
+ if redirect_info['status'] == 'error':
+ message = self._ERRORS.get(
+ redirect_info['http_code'], redirect_info['message'])
+ if redirect_info['http_code'] == 403:
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ format_url = redirect_info.get('url')
+ if not format_url:
+ continue
+
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': redirect_id,
+ })
+ if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
+ http_url = format_url
+ self._remove_duplicate_formats(formats)
+ m3u8_formats = list(filter(
+ lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
+ formats))
+ if http_url:
+ for m3u8_format in m3u8_formats:
+ bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None)
+ # Lower qualities (150k and 192k) are not available as HTTP formats (see [1]),
+ # we won't try extracting them.
+ # Since summer 2016 higher quality formats (4500k and 6500k) are also available
+ # albeit they are not documented in [2].
+ # 1. https://github.com/ytdl-org/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
+ # 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
+ if not bitrate or int(bitrate) < 400:
+ continue
+ f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url)
+ # This may produce invalid links sometimes (e.g.
+ # http://www.pbs.org/wgbh/frontline/film/suicide-plan)
+ if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate):
+ continue
+ f = m3u8_format.copy()
+ f.update({
+ 'url': f_url,
+ 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ rating_str = info.get('rating')
+ if rating_str is not None:
+ rating_str = rating_str.rpartition('-')[2]
+ age_limit = US_RATINGS.get(rating_str)
+
+ subtitles = {}
+ closed_captions_url = info.get('closed_captions_url')
+ if closed_captions_url:
+ subtitles['en'] = [{
+ 'ext': 'ttml',
+ 'url': closed_captions_url,
+ }]
+ mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
+ if mobj:
+ ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
+ ttml_caption_id = int(ttml_caption_id)
+ subtitles['en'].extend([{
+ 'url': closed_captions_url.replace(
+ ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
+ 'ext': 'srt',
+ }, {
+ 'url': closed_captions_url.replace(
+ ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
+ 'ext': 'vtt',
+ }])
+
+ # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
+ # Try turning it to 'program - title' naming scheme if possible
+ alt_title = info.get('program', {}).get('title')
+ if alt_title:
+ info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title'])
+
+ description = info.get('description') or info.get(
+ 'program', {}).get('description') or description
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': info['title'],
+ 'description': description,
+ 'thumbnail': info.get('image_url'),
+ 'duration': int_or_none(info.get('duration')),
+ 'age_limit': age_limit,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'chapters': chapters,
+ }
diff --git a/hypervideo_dl/extractor/pearvideo.py b/hypervideo_dl/extractor/pearvideo.py
new file mode 100644
index 0000000..1d77722
--- /dev/null
+++ b/hypervideo_dl/extractor/pearvideo.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ unified_timestamp,
+)
+
+
+class PearVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.pearvideo.com/video_1076290',
+ 'info_dict': {
+ 'id': '1076290',
+ 'ext': 'mp4',
+ 'title': '小浣熊在主人家玻璃上滚石头:没砸',
+ 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
+ 'timestamp': 1494275280,
+ 'upload_date': '20170508',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ quality = qualities(
+ ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
+
+ formats = [{
+ 'url': mobj.group('url'),
+ 'format_id': mobj.group('id'),
+ 'quality': quality(mobj.group('id')),
+ } for mobj in re.finditer(
+ r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
+ webpage)]
+ self._sort_formats(formats)
+
+ title = self._search_regex(
+ (r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
+ r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='value')
+ description = self._search_regex(
+ (r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
+ r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'description', default=None,
+ group='value') or self._html_search_meta('Description', webpage)
+ timestamp = unified_timestamp(self._search_regex(
+ r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
+ webpage, 'timestamp', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/peertube.py b/hypervideo_dl/extractor/peertube.py
new file mode 100644
index 0000000..d9b13ad
--- /dev/null
+++ b/hypervideo_dl/extractor/peertube.py
@@ -0,0 +1,628 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_resolution,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class PeerTubeIE(InfoExtractor):
+ _INSTANCES_RE = r'''(?:
+ # Taken from https://instances.joinpeertube.org/instances
+ peertube\.rainbowswingers\.net|
+ tube\.stanisic\.nl|
+ peer\.suiri\.us|
+ medias\.libox\.fr|
+ videomensoif\.ynh\.fr|
+ peertube\.travelpandas\.eu|
+ peertube\.rachetjay\.fr|
+ peertube\.montecsys\.fr|
+ tube\.eskuero\.me|
+ peer\.tube|
+ peertube\.umeahackerspace\.se|
+ tube\.nx-pod\.de|
+ video\.monsieurbidouille\.fr|
+ tube\.openalgeria\.org|
+ vid\.lelux\.fi|
+ video\.anormallostpod\.ovh|
+ tube\.crapaud-fou\.org|
+ peertube\.stemy\.me|
+ lostpod\.space|
+ exode\.me|
+ peertube\.snargol\.com|
+ vis\.ion\.ovh|
+ videosdulib\.re|
+ v\.mbius\.io|
+ videos\.judrey\.eu|
+ peertube\.osureplayviewer\.xyz|
+ peertube\.mathieufamily\.ovh|
+ www\.videos-libr\.es|
+ fightforinfo\.com|
+ peertube\.fediverse\.ru|
+ peertube\.oiseauroch\.fr|
+ video\.nesven\.eu|
+ v\.bearvideo\.win|
+ video\.qoto\.org|
+ justporn\.cc|
+ video\.vny\.fr|
+ peervideo\.club|
+ tube\.taker\.fr|
+ peertube\.chantierlibre\.org|
+ tube\.ipfixe\.info|
+ tube\.kicou\.info|
+ tube\.dodsorf\.as|
+ videobit\.cc|
+ video\.yukari\.moe|
+ videos\.elbinario\.net|
+ hkvideo\.live|
+ pt\.tux\.tf|
+ www\.hkvideo\.live|
+ FIGHTFORINFO\.com|
+ pt\.765racing\.com|
+ peertube\.gnumeria\.eu\.org|
+ nordenmedia\.com|
+ peertube\.co\.uk|
+ tube\.darfweb\.eu|
+ tube\.kalah-france\.org|
+ 0ch\.in|
+ vod\.mochi\.academy|
+ film\.node9\.org|
+ peertube\.hatthieves\.es|
+ video\.fitchfamily\.org|
+ peertube\.ddns\.net|
+ video\.ifuncle\.kr|
+ video\.fdlibre\.eu|
+ tube\.22decembre\.eu|
+ peertube\.harmoniescreatives\.com|
+ tube\.fabrigli\.fr|
+ video\.thedwyers\.co|
+ video\.bruitbruit\.com|
+ peertube\.foxfam\.club|
+ peer\.philoxweb\.be|
+ videos\.bugs\.social|
+ peertube\.malbert\.xyz|
+ peertube\.bilange\.ca|
+ libretube\.net|
+ diytelevision\.com|
+ peertube\.fedilab\.app|
+ libre\.video|
+ video\.mstddntfdn\.online|
+ us\.tv|
+ peertube\.sl-network\.fr|
+ peertube\.dynlinux\.io|
+ peertube\.david\.durieux\.family|
+ peertube\.linuxrocks\.online|
+ peerwatch\.xyz|
+ v\.kretschmann\.social|
+ tube\.otter\.sh|
+ yt\.is\.nota\.live|
+ tube\.dragonpsi\.xyz|
+ peertube\.boneheadmedia\.com|
+ videos\.funkwhale\.audio|
+ watch\.44con\.com|
+ peertube\.gcaillaut\.fr|
+ peertube\.icu|
+ pony\.tube|
+ spacepub\.space|
+ tube\.stbr\.io|
+ v\.mom-gay\.faith|
+ tube\.port0\.xyz|
+ peertube\.simounet\.net|
+ play\.jergefelt\.se|
+ peertube\.zeteo\.me|
+ tube\.danq\.me|
+ peertube\.kerenon\.com|
+ tube\.fab-l3\.org|
+ tube\.calculate\.social|
+ peertube\.mckillop\.org|
+ tube\.netzspielplatz\.de|
+ vod\.ksite\.de|
+ peertube\.laas\.fr|
+ tube\.govital\.net|
+ peertube\.stephenson\.cc|
+ bistule\.nohost\.me|
+ peertube\.kajalinifi\.de|
+ video\.ploud\.jp|
+ video\.omniatv\.com|
+ peertube\.ffs2play\.fr|
+ peertube\.leboulaire\.ovh|
+ peertube\.tronic-studio\.com|
+ peertube\.public\.cat|
+ peertube\.metalbanana\.net|
+ video\.1000i100\.fr|
+ peertube\.alter-nativ-voll\.de|
+ tube\.pasa\.tf|
+ tube\.worldofhauru\.xyz|
+ pt\.kamp\.site|
+ peertube\.teleassist\.fr|
+ videos\.mleduc\.xyz|
+ conf\.tube|
+ media\.privacyinternational\.org|
+ pt\.forty-two\.nl|
+ video\.halle-leaks\.de|
+ video\.grosskopfgames\.de|
+ peertube\.schaeferit\.de|
+ peertube\.jackbot\.fr|
+ tube\.extinctionrebellion\.fr|
+ peertube\.f-si\.org|
+ video\.subak\.ovh|
+ videos\.koweb\.fr|
+ peertube\.zergy\.net|
+ peertube\.roflcopter\.fr|
+ peertube\.floss-marketing-school\.com|
+ vloggers\.social|
+ peertube\.iriseden\.eu|
+ videos\.ubuntu-paris\.org|
+ peertube\.mastodon\.host|
+ armstube\.com|
+ peertube\.s2s\.video|
+ peertube\.lol|
+ tube\.open-plug\.eu|
+ open\.tube|
+ peertube\.ch|
+ peertube\.normandie-libre\.fr|
+ peertube\.slat\.org|
+ video\.lacaveatonton\.ovh|
+ peertube\.uno|
+ peertube\.servebeer\.com|
+ peertube\.fedi\.quebec|
+ tube\.h3z\.jp|
+ tube\.plus200\.com|
+ peertube\.eric\.ovh|
+ tube\.metadocs\.cc|
+ tube\.unmondemeilleur\.eu|
+ gouttedeau\.space|
+ video\.antirep\.net|
+ nrop\.cant\.at|
+ tube\.ksl-bmx\.de|
+ tube\.plaf\.fr|
+ tube\.tchncs\.de|
+ video\.devinberg\.com|
+ hitchtube\.fr|
+ peertube\.kosebamse\.com|
+ yunopeertube\.myddns\.me|
+ peertube\.varney\.fr|
+ peertube\.anon-kenkai\.com|
+ tube\.maiti\.info|
+ tubee\.fr|
+ videos\.dinofly\.com|
+ toobnix\.org|
+ videotape\.me|
+ voca\.tube|
+ video\.heromuster\.com|
+ video\.lemediatv\.fr|
+ video\.up\.edu\.ph|
+ balafon\.video|
+ video\.ivel\.fr|
+ thickrips\.cloud|
+ pt\.laurentkruger\.fr|
+ video\.monarch-pass\.net|
+ peertube\.artica\.center|
+ video\.alternanet\.fr|
+ indymotion\.fr|
+ fanvid\.stopthatimp\.net|
+ video\.farci\.org|
+ v\.lesterpig\.com|
+ video\.okaris\.de|
+ tube\.pawelko\.net|
+ peertube\.mablr\.org|
+ tube\.fede\.re|
+ pytu\.be|
+ evertron\.tv|
+ devtube\.dev-wiki\.de|
+ raptube\.antipub\.org|
+ video\.selea\.se|
+ peertube\.mygaia\.org|
+ video\.oh14\.de|
+ peertube\.livingutopia\.org|
+ peertube\.the-penguin\.de|
+ tube\.thechangebook\.org|
+ tube\.anjara\.eu|
+ pt\.pube\.tk|
+ video\.samedi\.pm|
+ mplayer\.demouliere\.eu|
+ widemus\.de|
+ peertube\.me|
+ peertube\.zapashcanon\.fr|
+ video\.latavernedejohnjohn\.fr|
+ peertube\.pcservice46\.fr|
+ peertube\.mazzonetto\.eu|
+ video\.irem\.univ-paris-diderot\.fr|
+ video\.livecchi\.cloud|
+ alttube\.fr|
+ video\.coop\.tools|
+ video\.cabane-libre\.org|
+ peertube\.openstreetmap\.fr|
+ videos\.alolise\.org|
+ irrsinn\.video|
+ video\.antopie\.org|
+ scitech\.video|
+ tube2\.nemsia\.org|
+ video\.amic37\.fr|
+ peertube\.freeforge\.eu|
+ video\.arbitrarion\.com|
+ video\.datsemultimedia\.com|
+ stoptrackingus\.tv|
+ peertube\.ricostrongxxx\.com|
+ docker\.videos\.lecygnenoir\.info|
+ peertube\.togart\.de|
+ tube\.postblue\.info|
+ videos\.domainepublic\.net|
+ peertube\.cyber-tribal\.com|
+ video\.gresille\.org|
+ peertube\.dsmouse\.net|
+ cinema\.yunohost\.support|
+ tube\.theocevaer\.fr|
+ repro\.video|
+ tube\.4aem\.com|
+ quaziinc\.com|
+ peertube\.metawurst\.space|
+ videos\.wakapo\.com|
+ video\.ploud\.fr|
+ video\.freeradical\.zone|
+ tube\.valinor\.fr|
+ refuznik\.video|
+ pt\.kircheneuenburg\.de|
+ peertube\.asrun\.eu|
+ peertube\.lagob\.fr|
+ videos\.side-ways\.net|
+ 91video\.online|
+ video\.valme\.io|
+ video\.taboulisme\.com|
+ videos-libr\.es|
+ tv\.mooh\.fr|
+ nuage\.acostey\.fr|
+ video\.monsieur-a\.fr|
+ peertube\.librelois\.fr|
+ videos\.pair2jeux\.tube|
+ videos\.pueseso\.club|
+ peer\.mathdacloud\.ovh|
+ media\.assassinate-you\.net|
+ vidcommons\.org|
+ ptube\.rousset\.nom\.fr|
+ tube\.cyano\.at|
+ videos\.squat\.net|
+ video\.iphodase\.fr|
+ peertube\.makotoworkshop\.org|
+ peertube\.serveur\.slv-valbonne\.fr|
+ vault\.mle\.party|
+ hostyour\.tv|
+ videos\.hack2g2\.fr|
+ libre\.tube|
+ pire\.artisanlogiciel\.net|
+ videos\.numerique-en-commun\.fr|
+ video\.netsyms\.com|
+ video\.die-partei\.social|
+ video\.writeas\.org|
+ peertube\.swarm\.solvingmaz\.es|
+ tube\.pericoloso\.ovh|
+ watching\.cypherpunk\.observer|
+ videos\.adhocmusic\.com|
+ tube\.rfc1149\.net|
+ peertube\.librelabucm\.org|
+ videos\.numericoop\.fr|
+ peertube\.koehn\.com|
+ peertube\.anarchmusicall\.net|
+ tube\.kampftoast\.de|
+ vid\.y-y\.li|
+ peertube\.xtenz\.xyz|
+ diode\.zone|
+ tube\.egf\.mn|
+ peertube\.nomagic\.uk|
+ visionon\.tv|
+ videos\.koumoul\.com|
+ video\.rastapuls\.com|
+ video\.mantlepro\.com|
+ video\.deadsuperhero\.com|
+ peertube\.musicstudio\.pro|
+ peertube\.we-keys\.fr|
+ artitube\.artifaille\.fr|
+ peertube\.ethernia\.net|
+ tube\.midov\.pl|
+ peertube\.fr|
+ watch\.snoot\.tube|
+ peertube\.donnadieu\.fr|
+ argos\.aquilenet\.fr|
+ tube\.nemsia\.org|
+ tube\.bruniau\.net|
+ videos\.darckoune\.moe|
+ tube\.traydent\.info|
+ dev\.videos\.lecygnenoir\.info|
+ peertube\.nayya\.org|
+ peertube\.live|
+ peertube\.mofgao\.space|
+ video\.lequerrec\.eu|
+ peertube\.amicale\.net|
+ aperi\.tube|
+ tube\.ac-lyon\.fr|
+ video\.lw1\.at|
+ www\.yiny\.org|
+ videos\.pofilo\.fr|
+ tube\.lou\.lt|
+ choob\.h\.etbus\.ch|
+ tube\.hoga\.fr|
+ peertube\.heberge\.fr|
+ video\.obermui\.de|
+ videos\.cloudfrancois\.fr|
+ betamax\.video|
+ video\.typica\.us|
+ tube\.piweb\.be|
+ video\.blender\.org|
+ peertube\.cat|
+ tube\.kdy\.ch|
+ pe\.ertu\.be|
+ peertube\.social|
+ videos\.lescommuns\.org|
+ tv\.datamol\.org|
+ videonaute\.fr|
+ dialup\.express|
+ peertube\.nogafa\.org|
+ megatube\.lilomoino\.fr|
+ peertube\.tamanoir\.foucry\.net|
+ peertube\.devosi\.org|
+ peertube\.1312\.media|
+ tube\.bootlicker\.party|
+ skeptikon\.fr|
+ video\.blueline\.mg|
+ tube\.homecomputing\.fr|
+ tube\.ouahpiti\.info|
+ video\.tedomum\.net|
+ video\.g3l\.org|
+ fontube\.fr|
+ peertube\.gaialabs\.ch|
+ tube\.kher\.nl|
+ peertube\.qtg\.fr|
+ video\.migennes\.net|
+ tube\.p2p\.legal|
+ troll\.tv|
+ videos\.iut-orsay\.fr|
+ peertube\.solidev\.net|
+ videos\.cemea\.org|
+ video\.passageenseine\.fr|
+ videos\.festivalparminous\.org|
+ peertube\.touhoppai\.moe|
+ sikke\.fi|
+ peer\.hostux\.social|
+ share\.tube|
+ peertube\.walkingmountains\.fr|
+ videos\.benpro\.fr|
+ peertube\.parleur\.net|
+ peertube\.heraut\.eu|
+ tube\.aquilenet\.fr|
+ peertube\.gegeweb\.eu|
+ framatube\.org|
+ thinkerview\.video|
+ tube\.conferences-gesticulees\.net|
+ peertube\.datagueule\.tv|
+ video\.lqdn\.fr|
+ tube\.mochi\.academy|
+ media\.zat\.im|
+ video\.colibris-outilslibres\.org|
+ tube\.svnet\.fr|
+ peertube\.video|
+ peertube3\.cpy\.re|
+ peertube2\.cpy\.re|
+ videos\.tcit\.fr|
+ peertube\.cpy\.re|
+ canard\.tube
+ )'''
+ _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _API_BASE = 'https://%s/api/v1/videos/%s/%s'
+ _VALID_URL = r'''(?x)
+ (?:
+ peertube:(?P<host>[^:]+):|
+ https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
+ )
+ (?P<id>%s)
+ ''' % (_INSTANCES_RE, _UUID_RE)
+ _TESTS = [{
+ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'md5': '9bed8c0137913e17b86334e5885aacff',
+ 'info_dict': {
+ 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'ext': 'mp4',
+ 'title': 'What is PeerTube?',
+ 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'timestamp': 1538391166,
+ 'upload_date': '20181001',
+ 'uploader': 'Framasoft',
+ 'uploader_id': '3',
+ 'uploader_url': 'https://framatube.org/accounts/framasoft',
+ 'channel': 'Les vidéos de Framasoft',
+ 'channel_id': '2',
+ 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'language': 'en',
+ 'license': 'Attribution - Share Alike',
+ 'duration': 113,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['framasoft', 'peertube'],
+ 'categories': ['Science & Technology'],
+ }
+ }, {
+ # Issue #26002
+ 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
+ 'info_dict': {
+ 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
+ 'ext': 'mp4',
+ 'title': 'Dot matrix printer shell demo',
+ 'uploader_id': '3',
+ 'timestamp': 1587401293,
+ 'upload_date': '20200420',
+ 'uploader': 'Drew DeVault',
+ }
+ }, {
+ 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
+ 'only_matching': True,
+ }, {
+ # nsfw
+ 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+ 'only_matching': True,
+ }, {
+ 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_peertube_url(webpage, source_url):
+ mobj = re.match(
+ r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
+ % PeerTubeIE._UUID_RE, source_url)
+ if mobj and any(p in webpage for p in (
+ '<title>PeerTube<',
+ 'There will be other non JS-based clients to access PeerTube',
+ '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
+ return 'peertube:%s:%s' % mobj.group('host', 'id')
+
+ @staticmethod
+ def _extract_urls(webpage, source_url):
+ entries = re.findall(
+ r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
+ % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
+ if not entries:
+ peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
+ if peertube_url:
+ entries = [peertube_url]
+ return entries
+
+ def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+ return self._download_json(
+ self._API_BASE % (host, video_id, path), video_id,
+ note=note, errnote=errnote, fatal=fatal)
+
+ def _get_subtitles(self, host, video_id):
+ captions = self._call_api(
+ host, video_id, 'captions', note='Downloading captions JSON',
+ fatal=False)
+ if not isinstance(captions, dict):
+ return
+ data = captions.get('data')
+ if not isinstance(data, list):
+ return
+ subtitles = {}
+ for e in data:
+ language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+ caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+ if not caption_url:
+ continue
+ subtitles.setdefault(language_id or 'en', []).append({
+ 'url': caption_url,
+ })
+ return subtitles
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host') or mobj.group('host_2')
+ video_id = mobj.group('id')
+
+ video = self._call_api(
+ host, video_id, '', note='Downloading video JSON')
+
+ title = video['name']
+
+ formats = []
+ files = video.get('files') or []
+ for playlist in (video.get('streamingPlaylists') or []):
+ if not isinstance(playlist, dict):
+ continue
+ playlist_files = playlist.get('files')
+ if not (playlist_files and isinstance(playlist_files, list)):
+ continue
+ files.extend(playlist_files)
+ for file_ in files:
+ if not isinstance(file_, dict):
+ continue
+ file_url = url_or_none(file_.get('fileUrl'))
+ if not file_url:
+ continue
+ file_size = int_or_none(file_.get('size'))
+ format_id = try_get(
+ file_, lambda x: x['resolution']['label'], compat_str)
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': file_url,
+ 'format_id': format_id,
+ 'filesize': file_size,
+ })
+ if format_id == '0p':
+ f['vcodec'] = 'none'
+ else:
+ f['fps'] = int_or_none(file_.get('fps'))
+ formats.append(f)
+ self._sort_formats(formats)
+
+ full_description = self._call_api(
+ host, video_id, 'description', note='Downloading description JSON',
+ fatal=False)
+
+ description = None
+ if isinstance(full_description, dict):
+ description = str_or_none(full_description.get('description'))
+ if not description:
+ description = video.get('description')
+
+ subtitles = self.extract_subtitles(host, video_id)
+
+ def data(section, field, type_):
+ return try_get(video, lambda x: x[section][field], type_)
+
+ def account_data(field, type_):
+ return data('account', field, type_)
+
+ def channel_data(field, type_):
+ return data('channel', field, type_)
+
+ category = data('category', 'label', compat_str)
+ categories = [category] if category else None
+
+ nsfw = video.get('nsfw')
+ if nsfw is bool:
+ age_limit = 18 if nsfw else 0
+ else:
+ age_limit = None
+
+ webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
+ 'timestamp': unified_timestamp(video.get('publishedAt')),
+ 'uploader': account_data('displayName', compat_str),
+ 'uploader_id': str_or_none(account_data('id', int)),
+ 'uploader_url': url_or_none(account_data('url', compat_str)),
+ 'channel': channel_data('displayName', compat_str),
+ 'channel_id': str_or_none(channel_data('id', int)),
+ 'channel_url': url_or_none(channel_data('url', compat_str)),
+ 'language': data('language', 'id', compat_str),
+ 'license': data('licence', 'label', compat_str),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(video.get('likes')),
+ 'dislike_count': int_or_none(video.get('dislikes')),
+ 'age_limit': age_limit,
+ 'tags': try_get(video, lambda x: x['tags'], list),
+ 'categories': categories,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'webpage_url': webpage_url,
+ }
diff --git a/hypervideo_dl/extractor/people.py b/hypervideo_dl/extractor/people.py
new file mode 100644
index 0000000..6ca9571
--- /dev/null
+++ b/hypervideo_dl/extractor/people.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class PeopleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html'
+
+ _TEST = {
+ 'url': 'http://www.people.com/people/videos/0,,20995451,00.html',
+ 'info_dict': {
+ 'id': 'ref:20995451',
+ 'ext': 'mp4',
+ 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”',
+ 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 246.318,
+ 'timestamp': 1458720585,
+ 'upload_date': '20160323',
+ 'uploader_id': '416418724',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s'
+ % self._match_id(url), 'BrightcoveNew')
diff --git a/hypervideo_dl/extractor/performgroup.py b/hypervideo_dl/extractor/performgroup.py
new file mode 100644
index 0000000..26942bf
--- /dev/null
+++ b/hypervideo_dl/extractor/performgroup.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PerformGroupIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})'
+ _TESTS = [{
+ # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html
+ 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab',
+ 'md5': '259cb03d142e2e52471e8837ecacb29f',
+ 'info_dict': {
+ 'id': 'xgrwobuzumes1lwjxtcdpwgxd',
+ 'ext': 'mp4',
+ 'title': 'Liga MX: Keine Einsicht nach Horrorfoul',
+ 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b',
+ 'timestamp': 1511533477,
+ 'upload_date': '20171124',
+ }
+ }]
+
+ def _call_api(self, service, auth_token, content_id, referer_url):
+ return self._download_json(
+ 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id),
+ content_id, headers={
+ 'Referer': referer_url,
+ 'Origin': 'http://player.performgroup.com',
+ }, query={
+ '_fmt': 'json',
+ })
+
+ def _real_extract(self, url):
+ player_id, auth_token = re.search(self._VALID_URL, url).groups()
+ bootstrap = self._call_api('bootstrap', auth_token, player_id, url)
+ video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0]
+ video_id = video['uuid']
+ vod = self._call_api('vod', auth_token, video_id, url)
+ media = vod['videos']['video'][0]['media']
+
+ formats = []
+ hls_url = media.get('hls', {}).get('url')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ hds_url = media.get('hds', {}).get('url')
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False))
+
+ for c in media.get('content', []):
+ c_url = c.get('url')
+ if not c_url:
+ continue
+ tbr = int_or_none(c.get('bitrate'), 1000)
+ format_id = 'http'
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': c_url,
+ 'tbr': tbr,
+ 'width': int_or_none(c.get('width')),
+ 'height': int_or_none(c.get('height')),
+ 'filesize': int_or_none(c.get('fileSize')),
+ 'vcodec': c.get('type'),
+ 'fps': int_or_none(c.get('videoFrameRate')),
+ 'vbr': int_or_none(c.get('videoRate'), 1000),
+ 'abr': int_or_none(c.get('audioRate'), 1000),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video.get('description'),
+ 'thumbnail': video.get('poster'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': int_or_none(video.get('publishedTime'), 1000),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/periscope.py b/hypervideo_dl/extractor/periscope.py
new file mode 100644
index 0000000..b159063
--- /dev/null
+++ b/hypervideo_dl/extractor/periscope.py
@@ -0,0 +1,189 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ unescapeHTML,
+)
+
+
+class PeriscopeBaseIE(InfoExtractor):
+ def _call_api(self, method, query, item_id):
+ return self._download_json(
+ 'https://api.periscope.tv/api/v2/%s' % method,
+ item_id, query=query)
+
+ def _parse_broadcast_data(self, broadcast, video_id):
+ title = broadcast.get('status') or 'Periscope Broadcast'
+ uploader = broadcast.get('user_display_name') or broadcast.get('username')
+ title = '%s - %s' % (uploader, title) if uploader else title
+ is_live = broadcast.get('state').lower() == 'running'
+
+ thumbnails = [{
+ 'url': broadcast[image],
+ } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
+
+ return {
+ 'id': broadcast.get('id') or video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'timestamp': parse_iso8601(broadcast.get('created_at')),
+ 'uploader': uploader,
+ 'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
+ 'thumbnails': thumbnails,
+ 'view_count': int_or_none(broadcast.get('total_watched')),
+ 'tags': broadcast.get('tags'),
+ 'is_live': is_live,
+ }
+
+ @staticmethod
+ def _extract_common_format_info(broadcast):
+ return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))
+
+ @staticmethod
+ def _add_width_and_height(f, width, height):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native'
+ if state in ('ended', 'timed_out') else 'm3u8',
+ m3u8_id=format_id, fatal=fatal)
+ if len(m3u8_formats) == 1:
+ self._add_width_and_height(m3u8_formats[0], width, height)
+ return m3u8_formats
+
+
+class PeriscopeIE(PeriscopeBaseIE):
+ IE_DESC = 'Periscope'
+ IE_NAME = 'periscope'
+ _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
+ # Alive example URLs can be found here https://www.periscope.tv/
+ _TESTS = [{
+ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
+ 'md5': '65b57957972e503fcbbaeed8f4fa04ca',
+ 'info_dict': {
+ 'id': '56102209',
+ 'ext': 'mp4',
+ 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
+ 'timestamp': 1438978559,
+ 'upload_date': '20150807',
+ 'uploader': 'Bec Boop',
+ 'uploader_id': '1465763',
+ },
+ 'skip': 'Expires in 24 hours',
+ }, {
+ 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ token = self._match_id(url)
+
+ stream = self._call_api(
+ 'accessVideoPublic', {'broadcast_id': token}, token)
+
+ broadcast = stream['broadcast']
+ info = self._parse_broadcast_data(broadcast, token)
+
+ state = broadcast.get('state').lower()
+ width = int_or_none(broadcast.get('width'))
+ height = int_or_none(broadcast.get('height'))
+
+ def add_width_and_height(f):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ video_urls = set()
+ formats = []
+ for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
+ video_url = stream.get(format_id + '_url')
+ if not video_url or video_url in video_urls:
+ continue
+ video_urls.add(video_url)
+ if format_id != 'rtmp':
+ m3u8_formats = self._extract_pscp_m3u8_formats(
+ video_url, token, format_id, state, width, height, False)
+ formats.extend(m3u8_formats)
+ continue
+ rtmp_format = {
+ 'url': video_url,
+ 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
+ }
+ self._add_width_and_height(rtmp_format)
+ formats.append(rtmp_format)
+ self._sort_formats(formats)
+
+ info['formats'] = formats
+ return info
+
+
+class PeriscopeUserIE(PeriscopeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
+ IE_DESC = 'Periscope user videos'
+ IE_NAME = 'periscope:user'
+
+ _TEST = {
+ 'url': 'https://www.periscope.tv/LularoeHusbandMike/',
+ 'info_dict': {
+ 'id': 'LularoeHusbandMike',
+ 'title': 'LULAROE HUSBAND MIKE',
+ 'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
+ },
+ # Periscope only shows videos in the last 24 hours, so it's possible to
+ # get 0 videos
+ 'playlist_mincount': 0,
+ }
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, user_name)
+
+ data_store = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-store=(["\'])(?P<data>.+?)\1',
+ webpage, 'data store', default='{}', group='data')),
+ user_name)
+
+ user = list(data_store['UserCache']['users'].values())[0]['user']
+ user_id = user['id']
+ session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
+
+ broadcasts = self._call_api(
+ 'getUserBroadcastsPublic',
+ {'user_id': user_id, 'session_id': session_id},
+ user_name)['broadcasts']
+
+ broadcast_ids = [
+ broadcast['id'] for broadcast in broadcasts if broadcast.get('id')]
+
+ title = user.get('display_name') or user.get('username') or user_name
+ description = user.get('description')
+
+ entries = [
+ self.url_result(
+ 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id))
+ for broadcast_id in broadcast_ids]
+
+ return self.playlist_result(entries, user_id, title, description)
diff --git a/hypervideo_dl/extractor/philharmoniedeparis.py b/hypervideo_dl/extractor/philharmoniedeparis.py
new file mode 100644
index 0000000..03da64b
--- /dev/null
+++ b/hypervideo_dl/extractor/philharmoniedeparis.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ urljoin,
+)
+
+
+class PhilharmonieDeParisIE(InfoExtractor):
+ IE_DESC = 'Philharmonie de Paris'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)|
+ pad\.philharmoniedeparis\.fr/doc/CIMU/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
+ 'md5': 'a0a4b195f544645073631cbec166a2c2',
+ 'info_dict': {
+ 'id': '1086697',
+ 'ext': 'mp4',
+ 'title': 'Jazz à la Villette : Knower',
+ },
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
+ 'info_dict': {
+ 'id': '1032066',
+ 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
+ 'only_matching': True,
+ }]
+ _LIVE_URL = 'https://live.philharmoniedeparis.fr'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_json(
+ '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
+ 'id': video_id,
+ 'lang': 'fr-FR',
+ })
+
+ def extract_entry(source):
+ if not isinstance(source, dict):
+ return
+ title = source.get('title')
+ if not title:
+ return
+ files = source.get('files')
+ if not isinstance(files, dict):
+ return
+ format_urls = set()
+ formats = []
+ for format_id in ('mobile', 'desktop'):
+ format_url = try_get(
+ files, lambda x: x[format_id]['file'], compat_str)
+ if not format_url or format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ m3u8_url = urljoin(self._LIVE_URL, format_url)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ if not formats:
+ return
+ self._sort_formats(formats)
+ return {
+ 'title': title,
+ 'formats': formats,
+ }
+
+ thumbnail = urljoin(self._LIVE_URL, config.get('image'))
+
+ info = extract_entry(config)
+ if info:
+ info.update({
+ 'id': video_id,
+ 'thumbnail': thumbnail,
+ })
+ return info
+
+ entries = []
+ for num, chapter in enumerate(config['chapters'], start=1):
+ entry = extract_entry(chapter)
+ entry['id'] = '%s-%d' % (video_id, num)
+ entries.append(entry)
+
+ return self.playlist_result(entries, video_id, config.get('title'))
diff --git a/hypervideo_dl/extractor/phoenix.py b/hypervideo_dl/extractor/phoenix.py
new file mode 100644
index 0000000..e3ea014
--- /dev/null
+++ b/hypervideo_dl/extractor/phoenix.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .youtube import YoutubeIE
+from .zdf import ZDFBaseIE
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ try_get,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class PhoenixIE(ZDFBaseIE):
+ IE_NAME = 'phoenix.de'
+ _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
+ _TESTS = [{
+ # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
+ 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
+ 'md5': '34ec321e7eb34231fd88616c65c92db0',
+ 'info_dict': {
+ 'id': '210222_phx_nachgehakt_corona_protest',
+ 'ext': 'mp4',
+ 'title': 'Wohin führt der Protest in der Pandemie?',
+ 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+ 'duration': 1691,
+ 'timestamp': 1613902500,
+ 'upload_date': '20210221',
+ 'uploader': 'Phoenix',
+ 'series': 'corona nachgehakt',
+ 'episode': 'Wohin führt der Protest in der Pandemie?',
+ },
+ }, {
+ # Youtube embed
+ 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
+ 'info_dict': {
+ 'id': 'hMQtqFYjomk',
+ 'ext': 'mp4',
+ 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
+ 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
+ 'duration': 3509,
+ 'upload_date': '20201219',
+ 'uploader': 'phoenix',
+ 'uploader_id': 'phoenix',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
+ 'only_matching': True,
+ }, {
+ # no media
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ article = self._download_json(
+ 'https://www.phoenix.de/response/id/%s' % article_id, article_id,
+ 'Downloading article JSON')
+
+ video = article['absaetze'][0]
+ title = video.get('titel') or article.get('subtitel')
+
+ if video.get('typ') == 'video-youtube':
+ video_id = video['id']
+ return self.url_result(
+ video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
+ video_title=title)
+
+ video_id = compat_str(video.get('basename') or video.get('content'))
+
+ details = self._download_json(
+ 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
+ video_id, 'Downloading details JSON', query={
+ 'ak': 'web',
+ 'ptmd': 'true',
+ 'id': video_id,
+ 'profile': 'player2',
+ })
+
+ title = title or details['title']
+ content_id = details['tracking']['nielsen']['content']['assetid']
+
+ info = self._extract_ptmd(
+ 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
+ content_id, None, url)
+
+ duration = int_or_none(try_get(
+ details, lambda x: x['tracking']['nielsen']['content']['length']))
+ timestamp = unified_timestamp(details.get('editorialDate'))
+ series = try_get(
+ details, lambda x: x['tracking']['nielsen']['content']['program'],
+ compat_str)
+ episode = title if details.get('contentType') == 'episode' else None
+
+ thumbnails = []
+ teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
+ for thumbnail_key, thumbnail_url in teaser_images.items():
+ thumbnail_url = urljoin(url, thumbnail_url)
+ if not thumbnail_url:
+ continue
+ thumbnail = {
+ 'url': thumbnail_url,
+ }
+ m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+ if m:
+ thumbnail['width'] = int(m.group(1))
+ thumbnail['height'] = int(m.group(2))
+ thumbnails.append(thumbnail)
+
+ return merge_dicts(info, {
+ 'id': content_id,
+ 'title': title,
+ 'description': details.get('leadParagraph'),
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'uploader': details.get('tvService'),
+ 'series': series,
+ 'episode': episode,
+ })
diff --git a/hypervideo_dl/extractor/photobucket.py b/hypervideo_dl/extractor/photobucket.py
new file mode 100644
index 0000000..6c8bbe1
--- /dev/null
+++ b/hypervideo_dl/extractor/photobucket.py
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class PhotobucketIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
+ _TEST = {
+ 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
+ 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
+ 'info_dict': {
+ 'id': 'zpsc0c3b9fa',
+ 'ext': 'mp4',
+ 'timestamp': 1367669341,
+ 'upload_date': '20130504',
+ 'uploader': 'rachaneronas',
+ 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ video_extension = mobj.group('ext')
+
+ webpage = self._download_webpage(url, video_id)
+
+ # Extract URL, uploader, and title from webpage
+ self.report_extraction(video_id)
+ info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
+ webpage, 'info json')
+ info = json.loads(info_json)
+ url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
+ return {
+ 'id': video_id,
+ 'url': url,
+ 'uploader': info['username'],
+ 'timestamp': info['creationDate'],
+ 'title': info['title'],
+ 'ext': video_extension,
+ 'thumbnail': info['thumbUrl'],
+ }
diff --git a/hypervideo_dl/extractor/picarto.py b/hypervideo_dl/extractor/picarto.py
new file mode 100644
index 0000000..e6c51e1
--- /dev/null
+++ b/hypervideo_dl/extractor/picarto.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+)
+
+
+class PicartoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://picarto.tv/Setz',
+ 'info_dict': {
+ 'id': 'Setz',
+ 'ext': 'mp4',
+ 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'timestamp': int,
+ 'is_live': True
+ },
+ 'skip': 'Stream is offline',
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
+ 'query': '''{
+ channel(name: "%s") {
+ adult
+ id
+ online
+ stream_name
+ title
+ }
+ getLoadBalancerUrl(channel_name: "%s") {
+ url
+ }
+}''' % (channel_id, channel_id),
+ })['data']
+ metadata = data['channel']
+
+ if metadata.get('online') == 0:
+ raise ExtractorError('Stream is offline', expected=True)
+ title = metadata['title']
+
+ cdn_data = self._download_json(
+ data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
+ channel_id, 'Downloading load balancing info')
+
+ formats = []
+ for source in (cdn_data.get('source') or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ source_type = source.get('type')
+ if source_type == 'html5/application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif source_type == 'html5/video/mp4':
+ formats.append({
+ 'url': source_url,
+ })
+ self._sort_formats(formats)
+
+ mature = metadata.get('adult')
+ if mature is None:
+ age_limit = None
+ else:
+ age_limit = 18 if mature is True else 0
+
+ return {
+ 'id': channel_id,
+ 'title': self._live_title(title.strip()),
+ 'is_live': True,
+ 'channel': channel_id,
+ 'channel_id': metadata.get('id'),
+ 'channel_url': 'https://picarto.tv/%s' % channel_id,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
+
+
+class PicartoVodIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
+ 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
+ 'info_dict': {
+ 'id': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'ext': 'mp4',
+ 'title': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'thumbnail': r're:^https?://.*\.jpg'
+ },
+ }, {
+ 'url': 'https://picarto.tv/videopopout/Plague',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ vod_info = self._parse_json(
+ self._search_regex(
+ r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
+ video_id),
+ video_id, transform_source=js_to_json)
+
+ formats = self._extract_m3u8_formats(
+ vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': vod_info.get('vodThumb'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/piksel.py b/hypervideo_dl/extractor/piksel.py
new file mode 100644
index 0000000..ecf56ff
--- /dev/null
+++ b/hypervideo_dl/extractor/piksel.py
@@ -0,0 +1,187 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+ unescapeHTML,
+)
+
+
+class PikselIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://
+ (?:
+ (?:
+ player\.
+ (?:
+ olympusattelecom|
+ vibebyvista
+ )|
+ (?:api|player)\.multicastmedia|
+ (?:api-ovp|player)\.piksel
+ )\.com|
+ (?:
+ mz-edge\.stream\.co|
+ movie-s\.nhk\.or
+ )\.jp|
+ vidego\.baltimorecity\.gov
+ )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://player.piksel.com/v/ums2867l',
+ 'md5': '34e34c8d89dc2559976a6079db531e85',
+ 'info_dict': {
+ 'id': 'ums2867l',
+ 'ext': 'mp4',
+ 'title': 'GX-005 with Caption',
+ 'timestamp': 1481335659,
+ 'upload_date': '20161210'
+ }
+ },
+ {
+ # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
+ 'url': 'https://player.piksel.com/v/v80kqp41',
+ 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
+ 'info_dict': {
+ 'id': 'v80kqp41',
+ 'ext': 'mp4',
+ 'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
+ 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
+ 'timestamp': 1486171129,
+ 'upload_date': '20170204'
+ }
+ },
+ {
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+ 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+ 'only_matching': True,
+ }
+ ]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _call_api(self, app_token, resource, display_id, query, fatal=True):
+ response = (self._download_json(
+ 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
+ display_id, query=query, fatal=fatal) or {}).get('response')
+ failure = try_get(response, lambda x: x['failure']['reason'])
+ if failure:
+ if fatal:
+ raise ExtractorError(failure, expected=True)
+ self.report_warning(failure)
+ return response
+
+ def _real_extract(self, url):
+ ref_id, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ app_token = self._search_regex([
+ r'clientAPI\s*:\s*"([^"]+)"',
+ r'data-de-api-key\s*=\s*"([^"]+)"'
+ ], webpage, 'app token')
+ query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
+ program = self._call_api(
+ app_token, 'program', display_id, query)['WsProgramResponse']['program']
+ video_id = program['uuid']
+ video_data = program['asset']
+ title = video_data['title']
+ asset_type = dict_get(video_data, ['assetType', 'asset_type'])
+
+ formats = []
+
+ def process_asset_file(asset_file):
+ if not asset_file:
+ return
+ # TODO: extract rtmp formats
+ http_url = asset_file.get('http_url')
+ if not http_url:
+ return
+ tbr = None
+ vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
+ abr = int_or_none(asset_file.get('audioBitrate'), 1024)
+ if asset_type == 'video':
+ tbr = vbr + abr
+ elif asset_type == 'audio':
+ tbr = abr
+
+ format_id = ['http']
+ if tbr:
+ format_id.append(compat_str(tbr))
+
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': unescapeHTML(http_url),
+ 'vbr': vbr,
+ 'abr': abr,
+ 'width': int_or_none(asset_file.get('videoWidth')),
+ 'height': int_or_none(asset_file.get('videoHeight')),
+ 'filesize': int_or_none(asset_file.get('filesize')),
+ 'tbr': tbr,
+ })
+
+ def process_asset_files(asset_files):
+ for asset_file in (asset_files or []):
+ process_asset_file(asset_file)
+
+ process_asset_files(video_data.get('assetFiles'))
+ process_asset_file(video_data.get('referenceFile'))
+ if not formats:
+ asset_id = video_data.get('assetid') or program.get('assetid')
+ if asset_id:
+ process_asset_files(try_get(self._call_api(
+ app_token, 'asset_file', display_id, {
+ 'assetid': asset_id,
+ }, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
+
+ m3u8_url = dict_get(video_data, [
+ 'm3u8iPadURL',
+ 'ipadM3u8Url',
+ 'm3u8AndroidURL',
+ 'm3u8iPhoneURL',
+ 'iphoneM3u8Url'])
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
+ if smil_url:
+ transform_source = None
+ if ref_id == 'nhkworld':
+ # TODO: figure out if this is something to be fixed in urljoin,
+ # _parse_smil_formats or keep it here
+ transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
+ formats.extend(self._extract_smil_formats(
+ re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
+ transform_source=transform_source, fatal=False))
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ if caption_url:
+ subtitles.setdefault(caption.get('locale', 'en'), []).append({
+ 'url': caption_url})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'timestamp': parse_iso8601(video_data.get('dateadd')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/pinkbike.py b/hypervideo_dl/extractor/pinkbike.py
new file mode 100644
index 0000000..9f3501f
--- /dev/null
+++ b/hypervideo_dl/extractor/pinkbike.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+ remove_start,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class PinkbikeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.pinkbike.com/video/402811/',
+ 'md5': '4814b8ca7651034cd87e3361d5c2155a',
+ 'info_dict': {
+ 'id': '402811',
+ 'ext': 'mp4',
+ 'title': 'Brandon Semenuk - RAW 100',
+ 'description': 'Official release: www.redbull.ca/rupertwalker',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 100,
+ 'upload_date': '20150406',
+ 'uploader': 'revelco',
+ 'location': 'Victoria, British Columbia, Canada',
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.pinkbike.com/video/%s' % video_id, video_id)
+
+ formats = []
+ for _, format_id, src in re.findall(
+ r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None))
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
+ description = self._html_search_regex(
+ r'(?s)id="media-description"[^>]*>(.+?)<',
+ webpage, 'description', default=None) or remove_start(
+ self._og_search_description(webpage), title + '. ')
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration'))
+
+ uploader = self._search_regex(
+ r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
+ 'uploader', fatal=False)
+ upload_date = unified_strdate(self._search_regex(
+ r'class="fullTime"[^>]+title="([^"]+)"',
+ webpage, 'upload date', fatal=False))
+
+ location = self._html_search_regex(
+ r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
+ webpage, 'location', fatal=False)
+
+ def extract_count(webpage, label):
+ return str_to_int(self._search_regex(
+ r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
+ webpage, label, fatal=False))
+
+ view_count = extract_count(webpage, 'Views')
+ comment_count = extract_count(webpage, 'Comments')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'location': location,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/pinterest.py b/hypervideo_dl/extractor/pinterest.py
new file mode 100644
index 0000000..42528d7
--- /dev/null
+++ b/hypervideo_dl/extractor/pinterest.py
@@ -0,0 +1,203 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class PinterestBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
+
+ def _call_api(self, resource, video_id, options):
+ return self._download_json(
+ 'https://www.pinterest.com/resource/%sResource/get/' % resource,
+ video_id, 'Download %s JSON metadata' % resource, query={
+ 'data': json.dumps({'options': options})
+ })['resource_response']
+
+ def _extract_video(self, data, extract_formats=True):
+ video_id = data['id']
+
+ title = (data.get('title') or data.get('grid_title') or video_id).strip()
+
+ urls = []
+ formats = []
+ duration = None
+ if extract_formats:
+ for format_id, format_dict in data['videos']['video_list'].items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = url_or_none(format_dict.get('url'))
+ if not format_url or format_url in urls:
+ continue
+ urls.append(format_url)
+ duration = float_or_none(format_dict.get('duration'), scale=1000)
+ ext = determine_ext(format_url)
+ if 'hls' in format_id.lower() or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'width': int_or_none(format_dict.get('width')),
+ 'height': int_or_none(format_dict.get('height')),
+ 'duration': duration,
+ })
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+ description = data.get('description') or data.get('description_html') or data.get('seo_description')
+ timestamp = unified_timestamp(data.get('created_at'))
+
+ def _u(field):
+ return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
+
+ uploader = _u('full_name')
+ uploader_id = _u('id')
+
+ repost_count = int_or_none(data.get('repin_count'))
+ comment_count = int_or_none(data.get('comment_count'))
+ categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
+ tags = data.get('hashtags')
+
+ thumbnails = []
+ images = data.get('images')
+ if isinstance(images, dict):
+ for thumbnail_id, thumbnail in images.items():
+ if not isinstance(thumbnail, dict):
+ continue
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'thumbnails': thumbnails,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'repost_count': repost_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'tags': tags,
+ 'formats': formats,
+ 'extractor_key': PinterestIE.ie_key(),
+ }
+
+
+class PinterestIE(PinterestBaseIE):
+ _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.pinterest.com/pin/664281013778109217/',
+ 'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
+ 'info_dict': {
+ 'id': '664281013778109217',
+ 'ext': 'mp4',
+ 'title': 'Origami',
+ 'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
+ 'duration': 57.7,
+ 'timestamp': 1593073622,
+ 'upload_date': '20200625',
+ 'uploader': 'Love origami -I am Dafei',
+ 'uploader_id': '586523688879454212',
+ 'repost_count': 50,
+ 'comment_count': 0,
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ 'url': 'https://co.pinterest.com/pin/824721750502199491/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._call_api(
+ 'Pin', video_id, {
+ 'field_set_key': 'unauth_react_main_pin',
+ 'id': video_id,
+ })['data']
+ return self._extract_video(data)
+
+
+class PinterestCollectionIE(PinterestBaseIE):
+ _VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
+ 'info_dict': {
+ 'id': '585890301462791043',
+ 'title': 'cool diys',
+ },
+ 'playlist_count': 8,
+ }, {
+ 'url': 'https://www.pinterest.ca/fudohub/videos/',
+ 'info_dict': {
+ 'id': '682858430939307450',
+ 'title': 'VIDEOS',
+ },
+ 'playlist_mincount': 365,
+ 'skip': 'Test with extract_formats=False',
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PinterestIE.suitable(url) else super(
+ PinterestCollectionIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ username, slug = re.match(self._VALID_URL, url).groups()
+ board = self._call_api(
+ 'Board', slug, {
+ 'slug': slug,
+ 'username': username
+ })['data']
+ board_id = board['id']
+ options = {
+ 'board_id': board_id,
+ 'page_size': 250,
+ }
+ bookmark = None
+ entries = []
+ while True:
+ if bookmark:
+ options['bookmarks'] = [bookmark]
+ board_feed = self._call_api('BoardFeed', board_id, options)
+ for item in (board_feed.get('data') or []):
+ if not isinstance(item, dict) or item.get('type') != 'pin':
+ continue
+ video_id = item.get('id')
+ if video_id:
+ # Some pins may not be available anonymously via pin URL
+ # video = self._extract_video(item, extract_formats=False)
+ # video.update({
+ # '_type': 'url_transparent',
+ # 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
+ # })
+ # entries.append(video)
+ entries.append(self._extract_video(item))
+ bookmark = board_feed.get('bookmark')
+ if not bookmark:
+ break
+ return self.playlist_result(
+ entries, playlist_id=board_id, playlist_title=board.get('name'))
diff --git a/hypervideo_dl/extractor/pladform.py b/hypervideo_dl/extractor/pladform.py
new file mode 100644
index 0000000..e86c653
--- /dev/null
+++ b/hypervideo_dl/extractor/pladform.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ xpath_text,
+ qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ out\.pladform\.ru/player|
+ static\.pladform\.ru/player\.swf
+ )
+ \?.*\bvideoid=|
+ video\.pladform\.ru/catalog/video/videoid/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
+ 'md5': '53362fac3a27352da20fa2803cc5cd6f',
+ 'info_dict': {
+ 'id': '3777899',
+ 'ext': 'mp4',
+ 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко',
+ 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3190,
+ },
+ }, {
+ 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ pl = qs.get('pl', ['1'])[0]
+
+ video = self._download_xml(
+ 'http://out.pladform.ru/getVideo', video_id, query={
+ 'pl': pl,
+ 'videoid': video_id,
+ })
+
+ def fail(text):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, text),
+ expected=True)
+
+ if video.tag == 'error':
+ fail(video.text)
+
+ quality = qualities(('ld', 'sd', 'hd'))
+
+ formats = []
+ for src in video.findall('./src'):
+ if src is None:
+ continue
+ format_url = src.text
+ if not format_url:
+ continue
+ if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src.text,
+ 'format_id': src.get('quality'),
+ 'quality': quality(src.get('quality')),
+ })
+
+ if not formats:
+ error = xpath_text(video, './cap', 'error', default=None)
+ if error:
+ fail(error)
+
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+ video_id)
+
+ title = self._og_search_title(webpage, fatal=False) or xpath_text(
+ video, './/title', 'title', fatal=True)
+ description = self._search_regex(
+ r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+ video, './/cover', 'cover')
+
+ duration = int_or_none(xpath_text(video, './/time', 'duration'))
+ age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/platzi.py b/hypervideo_dl/extractor/platzi.py
new file mode 100644
index 0000000..23c8256
--- /dev/null
+++ b/hypervideo_dl/extractor/platzi.py
@@ -0,0 +1,224 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class PlatziBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://platzi.com/login/'
+ _NETRC_MACHINE = 'platzi'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ urlh = self._request_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ # login succeeded
+ if 'platzi.com/login' not in urlh.geturl():
+ return
+
+ login_error = self._webpage_read_content(
+ urlh, self._LOGIN_URL, None, 'Downloading login error page')
+
+ login = self._parse_json(
+ self._search_regex(
+ r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
+ None)
+
+ for kind in ('error', 'password', 'nonFields'):
+ error = str_or_none(login.get('%sError' % kind))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class PlatziIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+ '''
+
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+ 'md5': '8f56448241005b561c10f11a595b37e3',
+ 'info_dict': {
+ 'id': '12074',
+ 'ext': 'mp4',
+ 'title': 'Creando nuestra primera página',
+ 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+ 'duration': 420,
+ },
+ 'skip': 'Requires platzi account credentials',
+ }, {
+ 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+ 'info_dict': {
+ 'id': '13430',
+ 'ext': 'mp4',
+ 'title': 'Background',
+ 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+ 'duration': 360,
+ },
+ 'skip': 'Requires platzi account credentials',
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ # client_data may contain "};" so that we have to try more
+ # strict regex first
+ (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+ r'client_data\s*=\s*({.+?})\s*;'),
+ webpage, 'client data'),
+ lecture_id)
+
+ material = data['initialState']['material']
+ desc = material['description']
+ title = desc['title']
+
+ formats = []
+ for server_id, server in material['videos'].items():
+ if not isinstance(server, dict):
+ continue
+ for format_id in ('hls', 'dash'):
+ format_url = url_or_none(server.get(format_id))
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lecture_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ note='Downloading %s m3u8 information' % server_id,
+ fatal=False))
+ elif format_id == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lecture_id, mpd_id=format_id,
+ note='Downloading %s MPD manifest' % server_id,
+ fatal=False))
+ self._sort_formats(formats)
+
+ content = str_or_none(desc.get('content'))
+ description = (clean_html(compat_b64decode(content).decode('utf-8'))
+ if content else None)
+ duration = int_or_none(material.get('duration'), invscale=60)
+
+ return {
+ 'id': lecture_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class PlatziCourseIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/(?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/next-js/',
+ 'info_dict': {
+ 'id': '1311',
+ 'title': 'Curso de Next.js',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://courses.platzi.com/classes/communication-codestream/',
+ 'info_dict': {
+ 'id': '1367',
+ 'title': 'Codestream Course',
+ },
+ 'playlist_count': 14,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_name)
+
+ props = self._parse_json(
+ self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
+ course_name)['initialProps']
+
+ entries = []
+ for chapter_num, chapter in enumerate(props['concepts'], 1):
+ if not isinstance(chapter, dict):
+ continue
+ materials = chapter.get('materials')
+ if not materials or not isinstance(materials, list):
+ continue
+ chapter_title = chapter.get('title')
+ chapter_id = str_or_none(chapter.get('id'))
+ for material in materials:
+ if not isinstance(material, dict):
+ continue
+ if material.get('material_type') != 'video':
+ continue
+ video_url = urljoin(url, material.get('url'))
+ if not video_url:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': str_or_none(material.get('name')),
+ 'id': str_or_none(material.get('id')),
+ 'ie_key': PlatziIE.ie_key(),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ })
+
+ course_id = compat_str(try_get(props, lambda x: x['course']['id']))
+ course_title = try_get(props, lambda x: x['course']['name'], compat_str)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/hypervideo_dl/extractor/playfm.py b/hypervideo_dl/extractor/playfm.py
new file mode 100644
index 0000000..e766ccc
--- /dev/null
+++ b/hypervideo_dl/extractor/playfm.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class PlayFMIE(InfoExtractor):
+ IE_NAME = 'play.fm'
+ _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
+
+ _TEST = {
+ 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
+ 'md5': 'c505f8307825a245d0c7ad1850001f22',
+ 'info_dict': {
+ 'id': '71276',
+ 'ext': 'mp3',
+ 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+ 'description': '',
+ 'duration': 5627,
+ 'timestamp': 1406033781,
+ 'upload_date': '20140722',
+ 'uploader': 'Dan Drastic',
+ 'uploader_id': '71170',
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ slug = mobj.group('slug')
+
+ recordings = self._download_json(
+ 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
+
+ error = recordings.get('error')
+ if isinstance(error, dict):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+ expected=True)
+
+ audio_url = recordings['audio']
+ video_id = compat_str(recordings.get('id') or video_id)
+ title = recordings['title']
+ description = recordings.get('description')
+ duration = int_or_none(recordings.get('recordingDuration'))
+ timestamp = parse_iso8601(recordings.get('created_at'))
+ uploader = recordings.get('page', {}).get('title')
+ uploader_id = compat_str(recordings.get('page', {}).get('id'))
+ view_count = int_or_none(recordings.get('playCount'))
+ comment_count = int_or_none(recordings.get('commentCount'))
+ categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
+
+ return {
+ 'id': video_id,
+ 'url': audio_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ }
diff --git a/hypervideo_dl/extractor/playplustv.py b/hypervideo_dl/extractor/playplustv.py
new file mode 100644
index 0000000..1e30ab2
--- /dev/null
+++ b/hypervideo_dl/extractor/playplustv.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ PUTRequest,
+)
+
+
+class PlayPlusTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
+ _TEST = {
+ 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
+ 'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
+ 'info_dict': {
+ 'id': 'db8d274a5163424e967f35a30ddafb8e',
+ 'ext': 'mp4',
+ 'title': 'Capítulo 179 - Final',
+ 'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
+ 'timestamp': 1529992740,
+ 'upload_date': '20180626',
+ },
+ 'skip': 'Requires account credential',
+ }
+ _NETRC_MACHINE = 'playplustv'
+ _GEO_COUNTRIES = ['BR']
+ _token = None
+ _profile_id = None
+
+ def _call_api(self, resource, video_id=None, query=None):
+ return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
+ 'Authorization': 'Bearer ' + self._token,
+ }, query=query)
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ self.raise_login_required()
+
+ req = PUTRequest(
+ 'https://api.playplus.tv/api/web/login', json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode(), {
+ 'Content-Type': 'application/json; charset=utf-8',
+ })
+
+ try:
+ self._token = self._download_json(req, None)['token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError(self._parse_json(
+ e.cause.read(), None)['errorMessage'], expected=True)
+ raise
+
+ self._profile = self._call_api('Profiles')['list'][0]['_id']
+
+ def _real_extract(self, url):
+ project_id, media_id = re.match(self._VALID_URL, url).groups()
+ media = self._call_api(
+ 'Media', media_id, {
+ 'profileId': self._profile,
+ 'projectId': project_id,
+ 'mediaId': media_id,
+ })['obj']
+ title = media['title']
+
+ formats = []
+ for f in media.get('files', []):
+ f_url = f.get('url')
+ if not f_url:
+ continue
+ file_info = f.get('fileInfo') or {}
+ formats.append({
+ 'url': f_url,
+ 'width': int_or_none(file_info.get('width')),
+ 'height': int_or_none(file_info.get('height')),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumb in media.get('thumbs', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(media.get('description')) or media.get('shortDescription'),
+ 'timestamp': int_or_none(media.get('publishDate'), 1000),
+ 'view_count': int_or_none(media.get('numberOfViews')),
+ 'comment_count': int_or_none(media.get('numberOfComments')),
+ 'tags': media.get('tags'),
+ }
diff --git a/hypervideo_dl/extractor/plays.py b/hypervideo_dl/extractor/plays.py
new file mode 100644
index 0000000..ddfc6f1
--- /dev/null
+++ b/hypervideo_dl/extractor/plays.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PlaysTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
+ _TESTS = [{
+ 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
+ 'md5': 'dfeac1198506652b5257a62762cec7bc',
+ 'info_dict': {
+ 'id': '56af17f56c95335490',
+ 'ext': 'mp4',
+ 'title': 'Bjergsen - When you outplay the Azir wall',
+ 'description': 'Posted by Bjergsen',
+ }
+ }, {
+ 'url': 'https://plays.tv/embeds/56af17f56c95335490',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://plays.tv/video/%s' % video_id, video_id)
+
+ info = self._search_json_ld(webpage, video_id,)
+
+ mpd_url, sources = re.search(
+ r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
+ webpage).groups()
+ formats = self._extract_mpd_formats(
+ self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
+ for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
+ formats.append({
+ 'url': self._proto_relative_url(format_url),
+ 'format_id': 'http-' + format_id,
+ 'height': int_or_none(height),
+ })
+ self._sort_formats(formats)
+
+ info.update({
+ 'id': video_id,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/playstuff.py b/hypervideo_dl/extractor/playstuff.py
new file mode 100644
index 0000000..5a32995
--- /dev/null
+++ b/hypervideo_dl/extractor/playstuff.py
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ smuggle_url,
+ try_get,
+)
+
+
+class PlayStuffIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
+ 'md5': 'c82d3669e5247c64bc382577843e5bd0',
+ 'info_dict': {
+ 'id': '6250584958001',
+ 'ext': 'mp4',
+ 'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
+ 'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
+ 'uploader_id': '6005208634001',
+ 'timestamp': 1619491027,
+ 'upload_date': '20210427',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ # geo restricted, bypassable
+ 'url': 'https://play.stuff.co.nz/details/_6155660351001',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ state = self._parse_json(
+ self._search_regex(
+ r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
+ video_id)
+
+ account_id = try_get(
+ state, lambda x: x['configurations']['accountId'],
+ compat_str) or '6005208634001'
+ player_id = try_get(
+ state, lambda x: x['configurations']['playerId'],
+ compat_str) or 'default'
+
+ entries = []
+ for item_id, video in state['items'].items():
+ if not isinstance(video, dict):
+ continue
+ asset_id = try_get(
+ video, lambda x: x['content']['attributes']['assetId'],
+ compat_str)
+ if not asset_id:
+ continue
+ entries.append(self.url_result(
+ smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
+ {'geo_countries': ['NZ']}),
+ 'BrightcoveNew', video_id))
+
+ return self.playlist_result(entries, video_id)
diff --git a/hypervideo_dl/extractor/playtvak.py b/hypervideo_dl/extractor/playtvak.py
new file mode 100644
index 0000000..4c5f579
--- /dev/null
+++ b/hypervideo_dl/extractor/playtvak.py
@@ -0,0 +1,191 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+)
+
+
+class PlaytvakIE(InfoExtractor):
+ IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz'
+ _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko',
+ 'md5': '4525ae312c324b4be2f4603cc78ceb4a',
+ 'info_dict': {
+ 'id': 'A150730_150323_hodinovy-manzel_kuko',
+ 'ext': 'mp4',
+ 'title': 'Vyžeňte vosy a sršně ze zahrady',
+ 'description': 'md5:4436e61b7df227a093778efb7e373571',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'duration': 279,
+ 'timestamp': 1438732860,
+ 'upload_date': '20150805',
+ 'is_live': False,
+ }
+ }, { # live video test
+ 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat',
+ 'info_dict': {
+ 'id': 'A150624_164934_planespotting_cat',
+ 'ext': 'flv',
+ 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }, { # another live stream, this one without Misc.videoFLV
+ 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
+ 'info_dict': {
+ 'id': 'A151218_145728_hlavni-nadrazi_plap',
+ 'ext': 'flv',
+ 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
+ }, { # idnes.cz
+ 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku',
+ 'md5': '819832ba33cd7016e58a6658577fe289',
+ 'info_dict': {
+ 'id': 'A150809_104116_domaci_pku',
+ 'ext': 'mp4',
+ 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
+ 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'duration': 39,
+ 'timestamp': 1438969140,
+ 'upload_date': '20150807',
+ 'is_live': False,
+ }
+ }, { # lidovky.cz
+ 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE',
+ 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8',
+ 'info_dict': {
+ 'id': 'A150808_214044_ln-video_ELE',
+ 'ext': 'mp4',
+ 'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
+ 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1439052180,
+ 'upload_date': '20150808',
+ 'is_live': False,
+ }
+ }, { # metro.cz
+ 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row',
+ 'md5': '84fc1deedcac37b7d4a6ccae7c716668',
+ 'info_dict': {
+ 'id': 'A141111_173251_metro-extra_row',
+ 'ext': 'mp4',
+ 'title': 'Recesisté udělali z billboardu kolotoč',
+ 'description': 'md5:7369926049588c3989a66c9c1a043c4c',
+ 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1415725500,
+ 'upload_date': '20141111',
+ 'is_live': False,
+ }
+ }, {
+ 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ info_url = self._html_search_regex(
+ r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
+
+ parsed_url = compat_urlparse.urlparse(info_url)
+
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ qs.update({
+ 'reklama': ['0'],
+ 'type': ['js'],
+ })
+
+ info_url = compat_urlparse.urlunparse(
+ parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ json_info = self._download_json(
+ info_url, video_id,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+ item = None
+ for i in json_info['items']:
+ if i.get('type') == 'video' or i.get('type') == 'stream':
+ item = i
+ break
+ if not item:
+ raise ExtractorError('No suitable stream found')
+
+ quality = qualities(('low', 'middle', 'high'))
+
+ formats = []
+ for fmt in item['video']:
+ video_url = fmt.get('file')
+ if not video_url:
+ continue
+
+ format_ = fmt['format']
+ format_id = '%s_%s' % (format_, fmt['quality'])
+ preference = None
+
+ if format_ in ('mp4', 'webm'):
+ ext = format_
+ elif format_ == 'rtmp':
+ ext = 'flv'
+ elif format_ == 'apple':
+ ext = 'mp4'
+ # Some streams have mp3 audio which does not play
+ # well with ffmpeg filter aac_adtstoasc
+ preference = -1
+ elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests
+ continue
+ else: # Other formats not supported yet
+ continue
+
+ formats.append({
+ 'url': video_url,
+ 'ext': ext,
+ 'format_id': format_id,
+ 'quality': quality(fmt.get('quality')),
+ 'preference': preference,
+ })
+ self._sort_formats(formats)
+
+ title = item['title']
+ is_live = item['type'] == 'stream'
+ if is_live:
+ title = self._live_title(title)
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+ timestamp = None
+ duration = None
+ if not is_live:
+ duration = int_or_none(item.get('length'))
+ timestamp = item.get('published')
+ if timestamp:
+ timestamp = parse_iso8601(timestamp[:-5])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': item.get('image'),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'is_live': is_live,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/playvid.py b/hypervideo_dl/extractor/playvid.py
new file mode 100644
index 0000000..4aef186
--- /dev/null
+++ b/hypervideo_dl/extractor/playvid.py
@@ -0,0 +1,99 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+)
+
+
+class PlayvidIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
+ _TESTS = [{
+ 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
+ 'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
+ 'info_dict': {
+ 'id': 'RnmBNgtrrJu',
+ 'ext': 'mp4',
+ 'title': 'md5:9256d01c6317e3f703848b5906880dc8',
+ 'duration': 82,
+ 'age_limit': 18,
+ },
+ 'skip': 'Video removed due to ToS',
+ }, {
+ 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
+ 'md5': '39d49df503ad7b8f23a4432cbf046477',
+ 'info_dict': {
+ 'id': 'hwb0GpNkzgH',
+ 'ext': 'mp4',
+ 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
+ 'age_limit': 18,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ m_error = re.search(
+ r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
+ if m_error:
+ raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
+
+ video_title = None
+ duration = None
+ video_thumbnail = None
+ formats = []
+
+ # most of the information is stored in the flashvars
+ flashvars = self._html_search_regex(
+ r'flashvars="(.+?)"', webpage, 'flashvars')
+
+ infos = compat_urllib_parse_unquote(flashvars).split(r'&')
+ for info in infos:
+ videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
+ if videovars_match:
+ key = videovars_match.group(1)
+ val = videovars_match.group(2)
+
+ if key == 'title':
+ video_title = compat_urllib_parse_unquote_plus(val)
+ if key == 'duration':
+ try:
+ duration = int(val)
+ except ValueError:
+ pass
+ if key == 'big_thumb':
+ video_thumbnail = val
+
+ videourl_match = re.match(
+ r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
+ if videourl_match:
+ height = int(videourl_match.group('resolution'))
+ formats.append({
+ 'height': height,
+ 'url': val,
+ })
+ self._sort_formats(formats)
+
+ # Extract title - should be in the flashvars; if not, look elsewhere
+ if video_title is None:
+ video_title = self._html_search_regex(
+ r'<title>(.*?)</title', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_title,
+ 'thumbnail': video_thumbnail,
+ 'duration': duration,
+ 'description': None,
+ 'age_limit': 18
+ }
diff --git a/hypervideo_dl/extractor/playwire.py b/hypervideo_dl/extractor/playwire.py
new file mode 100644
index 0000000..4d96a10
--- /dev/null
+++ b/hypervideo_dl/extractor/playwire.py
@@ -0,0 +1,75 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ float_or_none,
+)
+
+
+class PlaywireIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
+ 'md5': 'e6398701e3595888125729eaa2329ed9',
+ 'info_dict': {
+ 'id': '3353705',
+ 'ext': 'mp4',
+ 'title': 'S04_RM_UCL_Rus',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 145.94,
+ },
+ }, {
+ # m3u8 in f4m
+ 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
+ 'info_dict': {
+ 'id': '4840492',
+ 'ext': 'mp4',
+ 'title': 'ITV EL SHOW FULL',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # Multiple resolutions while bitrates missing
+ 'url': 'http://cdn.playwire.com/11625/embed/85228.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
+
+ player = self._download_json(
+ 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
+ video_id)
+
+ title = player['settings']['title']
+ duration = float_or_none(player.get('duration'), 1000)
+
+ content = player['content']
+ thumbnail = content.get('poster')
+ src = content['media']['f4m']
+
+ formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
+ for a_format in formats:
+ if not dict_get(a_format, ['tbr', 'width', 'height']):
+ a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/pluralsight.py b/hypervideo_dl/extractor/pluralsight.py
new file mode 100644
index 0000000..2d63855
--- /dev/null
+++ b/hypervideo_dl/extractor/pluralsight.py
@@ -0,0 +1,501 @@
+from __future__ import unicode_literals
+
+import collections
+import json
+import os
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ qualities,
+ srt_subtitles_timecode,
+ try_get,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class PluralsightBaseIE(InfoExtractor):
+ _API_BASE = 'https://app.pluralsight.com'
+
+ _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
+ _GRAPHQL_HEADERS = {
+ 'Content-Type': 'application/json;charset=UTF-8',
+ }
+ _GRAPHQL_COURSE_TMPL = '''
+query BootstrapPlayer {
+ rpc {
+ bootstrapPlayer {
+ profile {
+ firstName
+ lastName
+ email
+ username
+ userHandle
+ authed
+ isAuthed
+ plan
+ }
+ course(courseId: "%s") {
+ name
+ title
+ courseHasCaptions
+ translationLanguages {
+ code
+ name
+ }
+ supportsWideScreenVideoFormats
+ timestamp
+ modules {
+ name
+ title
+ duration
+ formattedDuration
+ author
+ authorized
+ clips {
+ authorized
+ clipId
+ duration
+ formattedDuration
+ id
+ index
+ moduleIndex
+ moduleTitle
+ name
+ title
+ watched
+ }
+ }
+ }
+ }
+ }
+}'''
+
+ def _download_course(self, course_id, url, display_id):
+ try:
+ return self._download_course_rpc(course_id, url, display_id)
+ except ExtractorError:
+ # Old API fallback
+ return self._download_json(
+ 'https://app.pluralsight.com/player/user/api/v1/player/payload',
+ display_id, data=urlencode_postdata({'courseId': course_id}),
+ headers={'Referer': url})
+
+ def _download_course_rpc(self, course_id, url, display_id):
+ response = self._download_json(
+ self._GRAPHQL_EP, display_id, data=json.dumps({
+ 'query': self._GRAPHQL_COURSE_TMPL % course_id,
+ 'variables': {}
+ }).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
+
+ course = try_get(
+ response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
+ dict)
+ if course:
+ return course
+
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']['message']),
+ expected=True)
+
+
+class PluralsightIE(PluralsightBaseIE):
+ IE_NAME = 'pluralsight'
+ _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?'
+ _LOGIN_URL = 'https://app.pluralsight.com/id/'
+
+ _NETRC_MACHINE = 'pluralsight'
+
+ _TESTS = [{
+ 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
+ 'md5': '4d458cf5cf4c593788672419a8dd4cf8',
+ 'info_dict': {
+ 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
+ 'ext': 'mp4',
+ 'title': 'Demo Monitoring',
+ 'duration': 338,
+ },
+ 'skip': 'Requires pluralsight account credentials',
+ }, {
+ 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
+ 'only_matching': True,
+ }, {
+ # available without pluralsight account
+ 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0',
+ 'only_matching': True,
+ }]
+
+ GRAPHQL_VIEWCLIP_TMPL = '''
+query viewClip {
+ viewClip(input: {
+ author: "%(author)s",
+ clipIndex: %(clipIndex)d,
+ courseName: "%(courseName)s",
+ includeCaptions: %(includeCaptions)s,
+ locale: "%(locale)s",
+ mediaType: "%(mediaType)s",
+ moduleName: "%(moduleName)s",
+ quality: "%(quality)s"
+ }) {
+ urls {
+ url
+ cdn
+ rank
+ source
+ },
+ status
+ }
+}'''
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'Username': username,
+ 'Password': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ error = self._search_regex(
+ r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+ if all(not re.search(p, response) for p in (
+ r'__INITIAL_STATE__', r'["\']currentUser["\']',
+ # new layout?
+ r'>\s*Sign out\s*<')):
+ BLOCKED = 'Your account has been blocked due to suspicious activity'
+ if BLOCKED in response:
+ raise ExtractorError(
+ 'Unable to login: %s' % BLOCKED, expected=True)
+ MUST_AGREE = 'To continue using Pluralsight, you must agree to'
+ if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')):
+ raise ExtractorError(
+ 'Unable to login: %s some documents. Go to pluralsight.com, '
+ 'log in and agree with what Pluralsight requires.'
+ % MUST_AGREE, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id):
+ captions = None
+ if clip_id:
+ captions = self._download_json(
+ '%s/transcript/api/v1/caption/json/%s/%s'
+ % (self._API_BASE, clip_id, lang), video_id,
+ 'Downloading captions JSON', 'Unable to download captions JSON',
+ fatal=False)
+ if not captions:
+ captions_post = {
+ 'a': author,
+ 'cn': int(clip_idx),
+ 'lc': lang,
+ 'm': name,
+ }
+ captions = self._download_json(
+ '%s/player/retrieve-captions' % self._API_BASE, video_id,
+ 'Downloading captions JSON', 'Unable to download captions JSON',
+ fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+ headers={'Content-Type': 'application/json;charset=utf-8'})
+ if captions:
+ return {
+ lang: [{
+ 'ext': 'json',
+ 'data': json.dumps(captions),
+ }, {
+ 'ext': 'srt',
+ 'data': self._convert_subtitles(duration, captions),
+ }]
+ }
+
+ @staticmethod
+ def _convert_subtitles(duration, subs):
+ srt = ''
+ TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset')
+ TEXT_KEYS = ('text', 'Text')
+ for num, current in enumerate(subs):
+ current = subs[num]
+ start, text = (
+ float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)),
+ dict_get(current, TEXT_KEYS))
+ if start is None or text is None:
+ continue
+ end = duration if num == len(subs) - 1 else float_or_none(
+ dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False))
+ if end is None:
+ continue
+ srt += os.linesep.join(
+ (
+ '%d' % num,
+ '%s --> %s' % (
+ srt_subtitles_timecode(start),
+ srt_subtitles_timecode(end)),
+ text,
+ os.linesep,
+ ))
+ return srt
+
+ def _real_extract(self, url):
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+
+ author = qs.get('author', [None])[0]
+ name = qs.get('name', [None])[0]
+ clip_idx = qs.get('clip', [None])[0]
+ course_name = qs.get('course', [None])[0]
+
+ if any(not f for f in (author, name, clip_idx, course_name,)):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ display_id = '%s-%s' % (name, clip_idx)
+
+ course = self._download_course(course_name, url, display_id)
+
+ collection = course['modules']
+
+ clip = None
+
+ for module_ in collection:
+ if name in (module_.get('moduleName'), module_.get('name')):
+ for clip_ in module_.get('clips', []):
+ clip_index = clip_.get('clipIndex')
+ if clip_index is None:
+ clip_index = clip_.get('index')
+ if clip_index is None:
+ continue
+ if compat_str(clip_index) == clip_idx:
+ clip = clip_
+ break
+
+ if not clip:
+ raise ExtractorError('Unable to resolve clip')
+
+ title = clip['title']
+ clip_id = clip.get('clipName') or clip.get('name') or clip['clipId']
+
+ QUALITIES = {
+ 'low': {'width': 640, 'height': 480},
+ 'medium': {'width': 848, 'height': 640},
+ 'high': {'width': 1024, 'height': 768},
+ 'high-widescreen': {'width': 1280, 'height': 720},
+ }
+
+ QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
+ quality_key = qualities(QUALITIES_PREFERENCE)
+
+ AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
+
+ ALLOWED_QUALITIES = (
+ AllowedQuality('webm', ['high', ]),
+ AllowedQuality('mp4', ['low', 'medium', 'high', ]),
+ )
+
+ # Some courses also offer widescreen resolution for high quality (see
+ # https://github.com/ytdl-org/youtube-dl/issues/7766)
+ widescreen = course.get('supportsWideScreenVideoFormats') is True
+ best_quality = 'high-widescreen' if widescreen else 'high'
+ if widescreen:
+ for allowed_quality in ALLOWED_QUALITIES:
+ allowed_quality.qualities.append(best_quality)
+
+ # In order to minimize the number of calls to ViewClip API and reduce
+ # the probability of being throttled or banned by Pluralsight we will request
+ # only single format until formats listing was explicitly requested.
+ if self._downloader.params.get('listformats', False):
+ allowed_qualities = ALLOWED_QUALITIES
+ else:
+ def guess_allowed_qualities():
+ req_format = self._downloader.params.get('format') or 'best'
+ req_format_split = req_format.split('-', 1)
+ if len(req_format_split) > 1:
+ req_ext, req_quality = req_format_split
+ req_quality = '-'.join(req_quality.split('-')[:2])
+ for allowed_quality in ALLOWED_QUALITIES:
+ if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
+ return (AllowedQuality(req_ext, (req_quality, )), )
+ req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
+ return (AllowedQuality(req_ext, (best_quality, )), )
+ allowed_qualities = guess_allowed_qualities()
+
+ formats = []
+ for ext, qualities_ in allowed_qualities:
+ for quality in qualities_:
+ f = QUALITIES[quality].copy()
+ clip_post = {
+ 'author': author,
+ 'includeCaptions': 'false',
+ 'clipIndex': int(clip_idx),
+ 'courseName': course_name,
+ 'locale': 'en',
+ 'moduleName': name,
+ 'mediaType': ext,
+ 'quality': '%dx%d' % (f['width'], f['height']),
+ }
+ format_id = '%s-%s' % (ext, quality)
+
+ try:
+ viewclip = self._download_json(
+ self._GRAPHQL_EP, display_id,
+ 'Downloading %s viewclip graphql' % format_id,
+ data=json.dumps({
+ 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
+ 'variables': {}
+ }).encode('utf-8'),
+ headers=self._GRAPHQL_HEADERS)['data']['viewClip']
+ except ExtractorError:
+ # Still works but most likely will go soon
+ viewclip = self._download_json(
+ '%s/video/clips/viewclip' % self._API_BASE, display_id,
+ 'Downloading %s viewclip JSON' % format_id, fatal=False,
+ data=json.dumps(clip_post).encode('utf-8'),
+ headers={'Content-Type': 'application/json;charset=utf-8'})
+
+ # Pluralsight tracks multiple sequential calls to ViewClip API and start
+ # to return 429 HTTP errors after some time (see
+ # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead
+ # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842).
+ # To somewhat reduce the probability of these consequences
+ # we will sleep random amount of time before each call to ViewClip.
+ self._sleep(
+ random.randint(5, 10), display_id,
+ '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
+
+ if not viewclip:
+ continue
+
+ clip_urls = viewclip.get('urls')
+ if not isinstance(clip_urls, list):
+ continue
+
+ for clip_url_data in clip_urls:
+ clip_url = clip_url_data.get('url')
+ if not clip_url:
+ continue
+ cdn = clip_url_data.get('cdn')
+ clip_f = f.copy()
+ clip_f.update({
+ 'url': clip_url,
+ 'ext': ext,
+ 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id,
+ 'quality': quality_key(quality),
+ 'source_preference': int_or_none(clip_url_data.get('rank')),
+ })
+ formats.append(clip_f)
+
+ self._sort_formats(formats)
+
+ duration = int_or_none(
+ clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
+
+ # TODO: other languages?
+ subtitles = self.extract_subtitles(
+ author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id)
+
+ return {
+ 'id': clip_id,
+ 'title': title,
+ 'duration': duration,
+ 'creator': author,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class PluralsightCourseIE(PluralsightBaseIE):
+ IE_NAME = 'pluralsight:course'
+ _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
+ _TESTS = [{
+ # Free course from Pluralsight Starter Subscription for Microsoft TechNet
+ # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
+ 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
+ 'info_dict': {
+ 'id': 'hosting-sql-server-windows-azure-iaas',
+ 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
+ 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
+ },
+ 'playlist_count': 31,
+ }, {
+ # available without pluralsight account
+ 'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ # TODO: PSM cookie
+
+ course = self._download_course(course_id, url, course_id)
+
+ title = course['title']
+ course_name = course['name']
+ course_data = course['modules']
+ description = course.get('description') or course.get('shortDescription')
+
+ entries = []
+ for num, module in enumerate(course_data, 1):
+ author = module.get('author')
+ module_name = module.get('name')
+ if not author or not module_name:
+ continue
+ for clip in module.get('clips', []):
+ clip_index = int_or_none(clip.get('index'))
+ if clip_index is None:
+ continue
+ clip_url = update_url_query(
+ '%s/player' % self._API_BASE, query={
+ 'mode': 'live',
+ 'course': course_name,
+ 'author': author,
+ 'name': module_name,
+ 'clip': clip_index,
+ })
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': clip_url,
+ 'ie_key': PluralsightIE.ie_key(),
+ 'chapter': module.get('title'),
+ 'chapter_number': num,
+ 'chapter_id': module.get('moduleRef'),
+ })
+
+ return self.playlist_result(entries, course_id, title, description)
diff --git a/hypervideo_dl/extractor/podomatic.py b/hypervideo_dl/extractor/podomatic.py
new file mode 100644
index 0000000..e782e3f
--- /dev/null
+++ b/hypervideo_dl/extractor/podomatic.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PodomaticIE(InfoExtractor):
+ IE_NAME = 'podomatic'
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?)://
+ (?:
+ (?P<channel>[^.]+)\.podomatic\.com/entry|
+ (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes
+ )/
+ (?P<id>[^/?#&]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
+ 'md5': '84bb855fcf3429e6bf72460e1eed782d',
+ 'info_dict': {
+ 'id': '2009-01-02T16_03_35-08_00',
+ 'ext': 'mp3',
+ 'uploader': 'Science Teaching Tips',
+ 'uploader_id': 'scienceteachingtips',
+ 'title': '64. When the Moon Hits Your Eye',
+ 'duration': 446,
+ }
+ }, {
+ 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
+ 'md5': 'd2cf443931b6148e27638650e2638297',
+ 'info_dict': {
+ 'id': '2013-11-15T16_31_21-08_00',
+ 'ext': 'mp3',
+ 'uploader': 'Ostbahnhof / Techno Mix',
+ 'uploader_id': 'ostbahnhof',
+ 'title': 'Einunddreizig',
+ 'duration': 3799,
+ }
+ }, {
+ 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ channel = mobj.group('channel') or mobj.group('channel_2')
+
+ json_url = (('%s://%s.podomatic.com/entry/embed_params/%s'
+ + '?permalink=true&rtmp=0') %
+ (mobj.group('proto'), channel, video_id))
+ data_json = self._download_webpage(
+ json_url, video_id, 'Downloading video info')
+ data = json.loads(data_json)
+
+ video_url = data['downloadLink']
+ if not video_url:
+ video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
+ uploader = data['podcast']
+ title = data['title']
+ thumbnail = data['imageLocation']
+ duration = int_or_none(data.get('length'), 1000)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'uploader': uploader,
+ 'uploader_id': channel,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/pokemon.py b/hypervideo_dl/extractor/pokemon.py
new file mode 100644
index 0000000..80222d4
--- /dev/null
+++ b/hypervideo_dl/extractor/pokemon.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+)
+
+
+class PokemonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
+ 'md5': '2fe8eaec69768b25ef898cda9c43062e',
+ 'info_dict': {
+ 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
+ 'ext': 'mp4',
+ 'title': 'The Ol’ Raise and Switch!',
+ 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
+ },
+ 'add_id': ['LimelightMedia'],
+ }, {
+ # no data-video-title
+ 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
+ 'info_dict': {
+ 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
+ 'ext': 'mp4',
+ 'title': "Pokémon : L'ascension de Darkrai",
+ 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
+ },
+ 'add_id': ['LimelightMedia'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, video_id or display_id)
+ video_data = extract_attributes(self._search_regex(
+ r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
+ webpage, 'video data element'))
+ video_id = video_data['data-video-id']
+ title = video_data.get('data-video-title') or self._html_search_meta(
+ 'pkm-title', webpage, ' title', default=None) or self._search_regex(
+ r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:%s' % video_id,
+ 'title': title,
+ 'description': video_data.get('data-video-summary'),
+ 'thumbnail': video_data.get('data-video-poster'),
+ 'series': 'Pokémon',
+ 'season_number': int_or_none(video_data.get('data-video-season')),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('data-video-episode')),
+ 'ie_key': 'LimelightMedia',
+ }
diff --git a/hypervideo_dl/extractor/polskieradio.py b/hypervideo_dl/extractor/polskieradio.py
new file mode 100644
index 0000000..978d6f8
--- /dev/null
+++ b/hypervideo_dl/extractor/polskieradio.py
@@ -0,0 +1,180 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urlparse
+)
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ strip_or_none,
+ unified_timestamp,
+)
+
+
+class PolskieRadioIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
+ 'info_dict': {
+ 'id': '1587943',
+ 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
+ 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
+ },
+ 'playlist': [{
+ 'md5': '2984ee6ce9046d91fc233bc1a864a09a',
+ 'info_dict': {
+ 'id': '1540576',
+ 'ext': 'mp3',
+ 'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
+ 'timestamp': 1456594200,
+ 'upload_date': '20160227',
+ 'duration': 2364,
+ 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$'
+ },
+ }],
+ }, {
+ 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
+ 'info_dict': {
+ 'id': '1635803',
+ 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
+ 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
+ 'only_matching': True,
+ }, {
+ # with mp4 video
+ 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ content = self._search_regex(
+ r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
+ webpage, 'content')
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
+ webpage, 'timestamp', fatal=False))
+
+ thumbnail_url = self._og_search_thumbnail(webpage)
+
+ entries = []
+
+ media_urls = set()
+
+ for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content):
+ media = self._parse_json(data_media, playlist_id, fatal=False)
+ if not media.get('file') or not media.get('desc'):
+ continue
+ media_url = self._proto_relative_url(media['file'], 'http:')
+ if media_url in media_urls:
+ continue
+ media_urls.add(media_url)
+ entries.append({
+ 'id': compat_str(media['id']),
+ 'url': media_url,
+ 'title': compat_urllib_parse_unquote(media['desc']),
+ 'duration': int_or_none(media.get('length')),
+ 'vcodec': 'none' if media.get('provider') == 'audio' else None,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail_url
+ })
+
+ title = self._og_search_title(webpage).strip()
+ description = strip_or_none(self._og_search_description(webpage))
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class PolskieRadioCategoryIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA',
+ 'info_dict': {
+ 'id': '5102',
+ 'title': 'HISTORIA ŻYWA',
+ },
+ 'playlist_mincount': 38,
+ }, {
+ 'url': 'http://www.polskieradio.pl/7/4807',
+ 'info_dict': {
+ 'id': '4807',
+ 'title': 'Vademecum 1050. rocznicy Chrztu Polski'
+ },
+ 'playlist_mincount': 5
+ }, {
+ 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
+ 'info_dict': {
+ 'id': '4143',
+ 'title': 'Kierunek Kraków',
+ },
+ 'playlist_mincount': 61
+ }, {
+ 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka',
+ 'info_dict': {
+ 'id': '214',
+ 'title': 'Muzyka',
+ },
+ 'playlist_mincount': 61
+ }, {
+ 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url)
+
+ def _entries(self, url, page, category_id):
+ content = page
+ for page_num in itertools.count(2):
+ for a_entry, entry_id in re.findall(
+ r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
+ content):
+ entry = extract_attributes(a_entry)
+ href = entry.get('href')
+ if not href:
+ continue
+ yield self.url_result(
+ compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(),
+ entry_id, entry.get('title'))
+ mobj = re.search(
+ r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ content)
+ if not mobj:
+ break
+ next_url = compat_urlparse.urljoin(url, mobj.group('url'))
+ content = self._download_webpage(
+ next_url, category_id, 'Downloading page %s' % page_num)
+
+ def _real_extract(self, url):
+ category_id = self._match_id(url)
+ webpage = self._download_webpage(url, category_id)
+ title = self._html_search_regex(
+ r'<title>([^<]+) - [^<]+ - [^<]+</title>',
+ webpage, 'title', fatal=False)
+ return self.playlist_result(
+ self._entries(url, webpage, category_id),
+ category_id, title)
diff --git a/hypervideo_dl/extractor/popcorntimes.py b/hypervideo_dl/extractor/popcorntimes.py
new file mode 100644
index 0000000..7bf7f98
--- /dev/null
+++ b/hypervideo_dl/extractor/popcorntimes.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_chr,
+)
+from ..utils import int_or_none
+
+
+class PopcorntimesIE(InfoExtractor):
+ _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
+ 'md5': '93f210991ad94ba8c3485950a2453257',
+ 'info_dict': {
+ 'id': 'A1XCFvz',
+ 'display_id': 'haensel-und-gretel-opera-fantasy',
+ 'ext': 'mp4',
+ 'title': 'Hänsel und Gretel',
+ 'description': 'md5:1b8146791726342e7b22ce8125cf6945',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'John Paul',
+ 'release_date': '19541009',
+ 'duration': 4260,
+ 'tbr': 5380,
+ 'width': 720,
+ 'height': 540,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title',
+ default=None) or self._html_search_meta(
+ 'ya:ovs:original_name', webpage, 'title', fatal=True)
+
+ loc = self._search_regex(
+ r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
+ group='value')
+
+ loc_b64 = ''
+ for c in loc:
+ c_ord = ord(c)
+ if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
+ upper = ord('Z') if c_ord <= ord('Z') else ord('z')
+ c_ord += 13
+ if upper < c_ord:
+ c_ord -= 26
+ loc_b64 += compat_chr(c_ord)
+
+ video_url = compat_b64decode(loc_b64).decode('utf-8')
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'thumbnail', default=None,
+ group='value') or self._og_search_thumbnail(webpage)
+
+ creator = self._html_search_meta(
+ 'video:director', webpage, 'creator', default=None)
+
+ release_date = self._html_search_meta(
+ 'video:release_date', webpage, default=None)
+ if release_date:
+ release_date = release_date.replace('-', '')
+
+ def int_meta(name):
+ return int_or_none(self._html_search_meta(
+ name, webpage, default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'creator': creator,
+ 'release_date': release_date,
+ 'duration': int_meta('video:duration'),
+ 'tbr': int_meta('ya:ovs:bitrate'),
+ 'width': int_meta('og:video:width'),
+ 'height': int_meta('og:video:height'),
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/hypervideo_dl/extractor/popcorntv.py b/hypervideo_dl/extractor/popcorntv.py
new file mode 100644
index 0000000..9f834fb
--- /dev/null
+++ b/hypervideo_dl/extractor/popcorntv.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class PopcornTVIE(InfoExtractor):
+ _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
+ 'md5': '47d65a48d147caf692ab8562fe630b45',
+ 'info_dict': {
+ 'id': '9183',
+ 'display_id': 'food-wars-battaglie-culinarie-episodio-01',
+ 'ext': 'mp4',
+ 'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
+ 'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1497610857,
+ 'upload_date': '20170616',
+ 'duration': 1440,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, video_id = mobj.group('display_id', 'id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ m3u8_url = extract_attributes(
+ self._search_regex(
+ r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
+ webpage, 'content'
+ ))['href']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ title = self._search_regex(
+ r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
+ 'title', default=None) or self._og_search_title(webpage)
+
+ description = self._html_search_regex(
+ r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp'))
+ duration = int_or_none(self._html_search_meta(
+ 'duration', webpage), invscale=60)
+ view_count = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/porn91.py b/hypervideo_dl/extractor/porn91.py
new file mode 100644
index 0000000..20eac64
--- /dev/null
+++ b/hypervideo_dl/extractor/porn91.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class Porn91IE(InfoExtractor):
+ IE_NAME = '91porn'
+ _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
+
+ _TEST = {
+ 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
+ 'md5': '7fcdb5349354f40d41689bd0fa8db05a',
+ 'info_dict': {
+ 'id': '7e42283b4f5ab36da134',
+ 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
+ 'ext': 'mp4',
+ 'duration': 431,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._set_cookie('91porn.com', 'language', 'cn_CN')
+
+ webpage = self._download_webpage(
+ 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
+
+ if '作为游客,你每天只可观看10个视频' in webpage:
+ raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
+
+ title = self._search_regex(
+ r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
+ title = title.replace('\n', '')
+
+ video_link_url = self._search_regex(
+ r'<textarea[^>]+id=["\']fm-video_link[^>]+>([^<]+)</textarea>',
+ webpage, 'video link')
+ videopage = self._download_webpage(video_link_url, video_id)
+
+ info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0]
+
+ duration = parse_duration(self._search_regex(
+ r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
+
+ comment_count = int_or_none(self._search_regex(
+ r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'comment_count': comment_count,
+ 'age_limit': self._rta_search(webpage),
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/porncom.py b/hypervideo_dl/extractor/porncom.py
new file mode 100644
index 0000000..5726cab
--- /dev/null
+++ b/hypervideo_dl/extractor/porncom.py
@@ -0,0 +1,103 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ parse_filesize,
+ str_to_int,
+)
+
+
+class PornComIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339',
+ 'md5': '3f30ce76267533cd12ba999263156de7',
+ 'info_dict': {
+ 'id': '2603339',
+ 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
+ 'ext': 'mp4',
+ 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 551,
+ 'view_count': int,
+ 'age_limit': 18,
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ config = self._parse_json(
+ self._search_regex(
+ (r'=\s*({.+?})\s*;\s*v1ar\b',
+ r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
+ webpage, 'config', default='{}'),
+ display_id, transform_source=js_to_json, fatal=False)
+
+ if config:
+ title = config['title']
+ formats = [{
+ 'url': stream['url'],
+ 'format_id': stream.get('id'),
+ 'height': int_or_none(self._search_regex(
+ r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None))
+ } for stream in config['streams'] if stream.get('url')]
+ thumbnail = (compat_urlparse.urljoin(
+ config['thumbCDN'], config['poster'])
+ if config.get('thumbCDN') and config.get('poster') else None)
+ duration = int_or_none(config.get('length'))
+ else:
+ title = self._search_regex(
+ (r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'),
+ webpage, 'title')
+ formats = [{
+ 'url': compat_urlparse.urljoin(url, format_url),
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ 'filesize_approx': parse_filesize(filesize),
+ } for format_url, height, filesize in re.findall(
+ r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
+ webpage)]
+ thumbnail = None
+ duration = None
+
+ self._sort_formats(formats)
+
+ view_count = str_to_int(self._search_regex(
+ (r'Views:\s*</span>\s*<span>\s*([\d,.]+)',
+ r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage,
+ 'view count', fatal=False))
+
+ def extract_list(kind):
+ s = self._search_regex(
+ (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(),
+ r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()),
+ webpage, kind, fatal=False)
+ return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ 'categories': extract_list('categories'),
+ 'tags': extract_list('tags'),
+ }
diff --git a/hypervideo_dl/extractor/pornhd.py b/hypervideo_dl/extractor/pornhd.py
new file mode 100644
index 0000000..c6052ac
--- /dev/null
+++ b/hypervideo_dl/extractor/pornhd.py
@@ -0,0 +1,121 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+ urljoin,
+)
+
+
+class PornHdIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
+ _TESTS = [{
+ 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
+ 'md5': '87f1540746c1d32ec7a2305c12b96b25',
+ 'info_dict': {
+ 'id': '9864',
+ 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
+ 'ext': 'mp4',
+ 'title': 'Restroom selfie masturbation',
+ 'description': 'md5:3748420395e03e31ac96857a8f125b2b',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+ 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
+ 'info_dict': {
+ 'id': '1962',
+ 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+ 'ext': 'mp4',
+ 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
+ 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id or video_id)
+
+ title = self._html_search_regex(
+ [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
+ r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
+
+ sources = self._parse_json(js_to_json(self._search_regex(
+ r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
+ webpage, 'sources', default='{}')), video_id)
+
+ info = {}
+ if not sources:
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+ if entries:
+ info = entries[0]
+
+ if not sources and not info:
+ message = self._html_search_regex(
+ r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
+ webpage, 'error message', group='value')
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ formats = []
+ for format_id, video_url in sources.items():
+ video_url = urljoin(url, video_url)
+ if not video_url:
+ continue
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': video_url,
+ 'ext': determine_ext(video_url, 'mp4'),
+ 'format_id': format_id,
+ 'height': height,
+ })
+ if formats:
+ info['formats'] = formats
+ self._sort_formats(info['formats'])
+
+ description = self._html_search_regex(
+ (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
+ r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
+ webpage, 'description', fatal=False,
+ group='value') or self._html_search_meta(
+ 'description', webpage, default=None) or self._og_search_description(webpage)
+ view_count = int_or_none(self._html_search_regex(
+ r'(\d+) views\s*<', webpage, 'view count', fatal=False))
+ thumbnail = self._search_regex(
+ r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
+ 'thumbnail', default=None, group='url')
+
+ like_count = int_or_none(self._search_regex(
+ (r'(\d+)</span>\s*likes',
+ r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
+ r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
+ webpage, 'like count', fatal=False))
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ })
diff --git a/hypervideo_dl/extractor/pornhub.py b/hypervideo_dl/extractor/pornhub.py
new file mode 100644
index 0000000..0314546
--- /dev/null
+++ b/hypervideo_dl/extractor/pornhub.py
@@ -0,0 +1,745 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import itertools
+import operator
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+ compat_urllib_request,
+)
+from .openload import PhantomJSwrapper
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ NO_DEFAULT,
+ orderedSet,
+ remove_quotes,
+ str_to_int,
+ update_url_query,
+ urlencode_postdata,
+ url_or_none,
+)
+
+
+class PornHubBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'pornhub'
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ def dl(*args, **kwargs):
+ return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+ ret = dl(*args, **kwargs)
+
+ if not ret:
+ return ret
+
+ webpage, urlh = ret
+
+ if any(re.search(p, webpage) for p in (
+ r'<body\b[^>]+\bonload=["\']go\(\)',
+ r'document\.cookie\s*=\s*["\']RNKEY=',
+ r'document\.location\.reload\(true\)')):
+ url_or_request = args[0]
+ url = (url_or_request.get_full_url()
+ if isinstance(url_or_request, compat_urllib_request.Request)
+ else url_or_request)
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ phantom.get(url, html=webpage)
+ webpage, urlh = dl(*args, **kwargs)
+
+ return webpage, urlh
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, host):
+ if self._logged_in:
+ return
+
+ site = host.split('.')[0]
+
+ # Both sites pornhub and pornhubpremium have separate accounts
+ # so there should be an option to provide credentials for both.
+ # At the same time some videos are available under the same video id
+ # on both sites so that we have to identify them as the same video.
+ # For that purpose we have to keep both in the same extractor
+ # but under different netrc machines.
+ username, password = self._get_login_info(netrc_machine=site)
+ if username is None:
+ return
+
+ login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
+ login_page = self._download_webpage(
+ login_url, None, 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']signOut',
+ r'>Sign\s+[Oo]ut\s*<'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ response = self._download_json(
+ 'https://www.%s/front/authenticate' % host, None,
+ 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': login_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ if response.get('success') == '1':
+ self._logged_in = True
+ return
+
+ message = response.get('message')
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % message, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+
+class PornHubIE(PornHubBaseIE):
+ IE_DESC = 'PornHub and Thumbzilla'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:www\.)?thumbzilla\.com/video/
+ )
+ (?P<id>[\da-z]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
+ 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
+ 'info_dict': {
+ 'id': '648719015',
+ 'ext': 'mp4',
+ 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
+ 'uploader': 'Babes',
+ 'upload_date': '20130628',
+ 'timestamp': 1372447216,
+ 'duration': 361,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ },
+ }, {
+ # non-ASCII title
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
+ 'info_dict': {
+ 'id': '1331683002',
+ 'ext': 'mp4',
+ 'title': '重庆婷婷女王足交',
+ 'upload_date': '20150213',
+ 'timestamp': 1423804862,
+ 'duration': 1753,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
+ }, {
+ # subtitles
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
+ 'info_dict': {
+ 'id': 'ph5af5fef7c2aa7',
+ 'ext': 'mp4',
+ 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
+ 'uploader': 'BFFs',
+ 'duration': 622,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ 'subtitles': {
+ 'en': [{
+ "ext": 'srt'
+ }]
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video has been disabled',
+ }, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
+ 'only_matching': True,
+ }, {
+ # removed at the request of cam4.com
+ 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
+ 'only_matching': True,
+ }, {
+ # removed at the request of the copyright owner
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
+ 'only_matching': True,
+ }, {
+ # removed by uploader
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
+ 'only_matching': True,
+ }, {
+ # private video
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
+ 'only_matching': True,
+ }, {
+ # Some videos are available with the same id on both premium
+ # and non-premium sites (e.g. this and the following test)
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
+ webpage)
+
+ def _extract_count(self, pattern, webpage, name):
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host') or 'pornhub.com'
+ video_id = mobj.group('id')
+
+ self._login(host)
+
+ self._set_cookie(host, 'age_verified', '1')
+
+ def dl_webpage(platform):
+ self._set_cookie(host, 'platform', platform)
+ return self._download_webpage(
+ 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
+ video_id, 'Downloading %s webpage' % platform)
+
+ webpage = dl_webpage('pc')
+
+ error_msg = self._html_search_regex(
+ (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+ r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
+ webpage, 'error message', default=None, group='error')
+ if error_msg:
+ error_msg = re.sub(r'\s+', ' ', error_msg)
+ raise ExtractorError(
+ 'PornHub said: %s' % error_msg,
+ expected=True, video_id=video_id)
+
+ # video_title from flashvars contains whitespace instead of non-ASCII (see
+ # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
+ # on that anymore.
+ title = self._html_search_meta(
+ 'twitter:title', webpage, default=None) or self._html_search_regex(
+ (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+ r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='title')
+
+ video_urls = []
+ video_urls_set = set()
+ subtitles = {}
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
+ video_id)
+ if flashvars:
+ subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
+ if subtitle_url:
+ subtitles.setdefault('en', []).append({
+ 'url': subtitle_url,
+ 'ext': 'srt',
+ })
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ media_definitions = flashvars.get('mediaDefinitions')
+ if isinstance(media_definitions, list):
+ for definition in media_definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = definition.get('videoUrl')
+ if not video_url or not isinstance(video_url, compat_str):
+ continue
+ if video_url in video_urls_set:
+ continue
+ video_urls_set.add(video_url)
+ video_urls.append(
+ (video_url, int_or_none(definition.get('quality'))))
+ else:
+ thumbnail, duration = [None] * 2
+
+ def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
+ assignments = self._search_regex(
+ pattern, webpage, 'encoded url', default=default)
+ if not assignments:
+ return {}
+
+ assignments = assignments.split(';')
+
+ js_vars = {}
+
+ def parse_js_value(inp):
+ inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+ if '+' in inp:
+ inps = inp.split('+')
+ return functools.reduce(
+ operator.concat, map(parse_js_value, inps))
+ inp = inp.strip()
+ if inp in js_vars:
+ return js_vars[inp]
+ return remove_quotes(inp)
+
+ for assn in assignments:
+ assn = assn.strip()
+ if not assn:
+ continue
+ assn = re.sub(r'var\s+', '', assn)
+ vname, value = assn.split('=', 1)
+ js_vars[vname] = parse_js_value(value)
+ return js_vars
+
+ def add_video_url(video_url):
+ v_url = url_or_none(video_url)
+ if not v_url:
+ return
+ if v_url in video_urls_set:
+ return
+ video_urls.append((v_url, None))
+ video_urls_set.add(v_url)
+
+ def parse_quality_items(quality_items):
+ q_items = self._parse_json(quality_items, video_id, fatal=False)
+ if not isinstance(q_items, list):
+ return
+ for item in q_items:
+ if isinstance(item, dict):
+ add_video_url(item.get('url'))
+
+ if not video_urls:
+ FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
+ js_vars = extract_js_vars(
+ webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
+ default=None)
+ if js_vars:
+ for key, format_url in js_vars.items():
+ if key.startswith(FORMAT_PREFIXES[-1]):
+ parse_quality_items(format_url)
+ elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
+ add_video_url(format_url)
+ if not video_urls and re.search(
+ r'<[^>]+\bid=["\']lockedPlayer', webpage):
+ raise ExtractorError(
+ 'Video %s is locked' % video_id, expected=True)
+
+ if not video_urls:
+ js_vars = extract_js_vars(
+ dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
+ add_video_url(js_vars['mediastring'])
+
+ for mobj in re.finditer(
+ r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage):
+ video_url = mobj.group('url')
+ if video_url not in video_urls_set:
+ video_urls.append((video_url, None))
+ video_urls_set.add(video_url)
+
+ upload_date = None
+ formats = []
+
+ def add_format(format_url, height=None):
+ ext = determine_ext(format_url)
+ if ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ return
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ return
+ tbr = None
+ mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
+ if mobj:
+ if not height:
+ height = int(mobj.group('height'))
+ tbr = int(mobj.group('tbr'))
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ 'tbr': tbr,
+ })
+
+ for video_url, height in video_urls:
+ if not upload_date:
+ upload_date = self._search_regex(
+ r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('/', '')
+ if '/video/get_media' in video_url:
+ medias = self._download_json(video_url, video_id, fatal=False)
+ if isinstance(medias, list):
+ for media in medias:
+ if not isinstance(media, dict):
+ continue
+ video_url = url_or_none(media.get('videoUrl'))
+ if not video_url:
+ continue
+ height = int_or_none(media.get('quality'))
+ add_format(video_url, height)
+ continue
+ add_format(video_url)
+ self._sort_formats(formats)
+
+ video_uploader = self._html_search_regex(
+ r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
+ webpage, 'uploader', default=None)
+
+ def extract_vote_count(kind, name):
+ return self._extract_count(
+ (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
+ r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
+ webpage, name)
+
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
+ like_count = extract_vote_count('Up', 'like')
+ dislike_count = extract_vote_count('Down', 'dislike')
+ comment_count = self._extract_count(
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
+
+ def extract_list(meta_key):
+ div = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
+ % meta_key, webpage, meta_key, default=None)
+ if div:
+ return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
+
+ info = self._search_json_ld(webpage, video_id, default={})
+ # description provided in JSON-LD is irrelevant
+ info['description'] = None
+
+ return merge_dicts({
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'upload_date': upload_date,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
+ 'subtitles': subtitles,
+ }, info)
+
+
+class PornHubPlaylistBaseIE(PornHubBaseIE):
+ def _extract_page(self, url):
+ return int_or_none(self._search_regex(
+ r'\bpage=(\d+)', url, 'page', default=None))
+
+ def _extract_entries(self, webpage, host):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
+ return [
+ self.url_result(
+ 'http://www.%s/%s' % (host, video_url),
+ PornHubIE.ie_key(), video_title=title)
+ for video_url, title in orderedSet(re.findall(
+ r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
+ container))
+ ]
+
+
+class PornHubUserIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph',
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
+ 'info_dict': {
+ 'id': 'liz-vicious',
+ },
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/users/russianveet69',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/channels/povd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
+ 'only_matching': True,
+ }, {
+ # Unavailable via /videos page, but available with direct pagination
+ # on pornstar page (see [1]), requires premium
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+ 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
+ 'only_matching': True,
+ }, {
+ # Same as before, multi page
+ 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('id')
+ videos_url = '%s/videos' % mobj.group('url')
+ page = self._extract_page(url)
+ if page:
+ videos_url = update_url_query(videos_url, {'page': page})
+ return self.url_result(
+ videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
+
+
+class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
+ @staticmethod
+ def _has_more(webpage):
+ return re.search(
+ r'''(?x)
+ <li[^>]+\bclass=["\']page_next|
+ <link[^>]+\brel=["\']next|
+ <button[^>]+\bid=["\']moreDataBtn
+ ''', webpage) is not None
+
+ def _entries(self, url, host, item_id):
+ page = self._extract_page(url)
+
+ VIDEOS = '/videos'
+
+ def download_page(base_url, num, fallback=False):
+ note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
+ return self._download_webpage(
+ base_url, item_id, note, query={'page': num})
+
+ def is_404(e):
+ return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+
+ base_url = url
+ has_page = page is not None
+ first_page = page if has_page else 1
+ for page_num in (first_page, ) if has_page else itertools.count(first_page):
+ try:
+ try:
+ webpage = download_page(base_url, page_num)
+ except ExtractorError as e:
+ # Some sources may not be available via /videos page,
+ # trying to fallback to main page pagination (see [1])
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+ if is_404(e) and page_num == first_page and VIDEOS in base_url:
+ base_url = base_url.replace(VIDEOS, '')
+ webpage = download_page(base_url, page_num, fallback=True)
+ else:
+ raise
+ except ExtractorError as e:
+ if is_404(e) and page_num != first_page:
+ break
+ raise
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ for e in page_entries:
+ yield e
+ if not self._has_more(webpage):
+ break
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ self._login(host)
+
+ return self.playlist_result(self._entries(url, host, item_id), item_id)
+
+
+class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/rushandlia/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 149,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ # default sorting as Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos',
+ 'info_dict': {
+ 'id': 'channels/povd/videos',
+ },
+ 'playlist_mincount': 293,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
+ 'only_matching': True,
+ }, {
+ # Most Recent Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
+ 'only_matching': True,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
+ 'only_matching': True,
+ }, {
+ # Longest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
+ 'only_matching': True,
+ }, {
+ # Newest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/search?search=123',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video?page=2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/44121572',
+ 'info_dict': {
+ 'id': 'playlist/44121572',
+ },
+ 'playlist_mincount': 132,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
+ else super(PornHubPagedVideoListIE, cls).suitable(url))
+
+
+class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+ 'info_dict': {
+ 'id': 'jenny-blighe',
+ },
+ 'playlist_mincount': 129,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/pornotube.py b/hypervideo_dl/extractor/pornotube.py
new file mode 100644
index 0000000..1b5b9a3
--- /dev/null
+++ b/hypervideo_dl/extractor/pornotube.py
@@ -0,0 +1,85 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PornotubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science',
+ 'md5': '60fc5a4f0d93a97968fc7999d98260c9',
+ 'info_dict': {
+ 'id': '4964',
+ 'ext': 'mp4',
+ 'upload_date': '20141203',
+ 'title': 'Weird Hot and Wet Science',
+ 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0',
+ 'categories': ['Adult Humor', 'Blondes'],
+ 'uploader': 'Alpha Blue Archives',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1417582800,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ token = self._download_json(
+ 'https://api.aebn.net/auth/v2/origins/authenticate',
+ video_id, note='Downloading token',
+ data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'http://www.pornotube.com',
+ })['tokenKey']
+
+ video_url = self._download_json(
+ 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id,
+ video_id, note='Downloading delivery information',
+ headers={'Authorization': token})['mediaUrl']
+
+ FIELDS = (
+ 'title', 'description', 'startSecond', 'endSecond', 'publishDate',
+ 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber'
+ )
+
+ info = self._download_json(
+ 'https://api.aebn.net/content/v2/clips/%s?fields=%s'
+ % (video_id, ','.join(FIELDS)), video_id,
+ note='Downloading metadata',
+ headers={'Authorization': token})
+
+ if isinstance(info, list):
+ info = info[0]
+
+ title = info['title']
+
+ timestamp = int_or_none(info.get('publishDate'), scale=1000)
+ uploader = info.get('studios', [{}])[0].get('name')
+ movie_id = info.get('movieId')
+ primary_image_number = info.get('primaryImageNumber')
+ thumbnail = None
+ if movie_id and primary_image_number:
+ thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
+ movie_id, movie_id, primary_image_number)
+ start = int_or_none(info.get('startSecond'))
+ end = int_or_none(info.get('endSecond'))
+ duration = end - start if start and end else None
+ categories = [c['name'] for c in info.get('categories', []) if c.get('name')]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': info.get('description'),
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/pornovoisines.py b/hypervideo_dl/extractor/pornovoisines.py
new file mode 100644
index 0000000..b6b7106
--- /dev/null
+++ b/hypervideo_dl/extractor/pornovoisines.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)'
+
+ _TEST = {
+ 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html',
+ 'md5': '6f8aca6a058592ab49fe701c8ba8317b',
+ 'info_dict': {
+ 'id': '919',
+ 'display_id': 'recherche-appartement',
+ 'ext': 'mp4',
+ 'title': 'Recherche appartement',
+ 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140925',
+ 'duration': 120,
+ 'view_count': int,
+ 'average_rating': float,
+ 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'],
+ 'age_limit': 18,
+ 'subtitles': {
+ 'fr': [{
+ 'ext': 'vtt',
+ }]
+ },
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ settings_url = self._download_json(
+ 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id,
+ video_id, note='Getting settings URL')['video_settings_url']
+ settings = self._download_json(settings_url, video_id)['data']
+
+ formats = []
+ for kind, data in settings['variants'].items():
+ if kind == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
+ elif kind == 'MP4':
+ for item in data:
+ formats.append({
+ 'url': item['url'],
+ 'height': item.get('height'),
+ 'bitrate': item.get('bitrate'),
+ })
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ # The webpage has a bug - there's no space between "thumb" and src=
+ thumbnail = self._html_search_regex(
+ r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2',
+ webpage, 'thumbnail', fatal=False, group='url')
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False))
+ duration = settings.get('main', {}).get('duration')
+ view_count = int_or_none(self._search_regex(
+ r'(\d+) vues', webpage, 'view count', fatal=False))
+ average_rating = self._search_regex(
+ r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False)
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ categories = self._html_search_regex(
+ r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False)
+ if categories:
+ categories = [category.strip() for category in categories.split(',')]
+
+ subtitles = {'fr': [{
+ 'url': subtitle,
+ } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]}
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'age_limit': 18,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/pornoxo.py b/hypervideo_dl/extractor/pornoxo.py
new file mode 100644
index 0000000..2831368
--- /dev/null
+++ b/hypervideo_dl/extractor/pornoxo.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+)
+
+
+class PornoXOIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
+ _TEST = {
+ 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
+ 'md5': '582f28ecbaa9e6e24cb90f50f524ce87',
+ 'info_dict': {
+ 'id': '7564',
+ 'ext': 'flv',
+ 'title': 'Striptease From Sexy Secretary!',
+ 'display_id': 'striptease-from-sexy-secretary',
+ 'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
+ 'categories': list, # NSFW
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.groups()
+
+ webpage = self._download_webpage(url, video_id)
+ video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
+
+ view_count = str_to_int(self._html_search_regex(
+ r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
+
+ categories_str = self._html_search_regex(
+ r'<meta name="description" content=".*featuring\s*([^"]+)"',
+ webpage, 'categories', fatal=False)
+ categories = (
+ None if categories_str is None
+ else categories_str.split(','))
+
+ video_data.update({
+ 'id': video_id,
+ 'title': title,
+ 'display_id': display_id,
+ 'description': self._html_search_meta('description', webpage),
+ 'categories': categories,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ })
+
+ return video_data
diff --git a/hypervideo_dl/extractor/presstv.py b/hypervideo_dl/extractor/presstv.py
new file mode 100644
index 0000000..b5c2792
--- /dev/null
+++ b/hypervideo_dl/extractor/presstv.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class PressTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?'
+
+ _TEST = {
+ 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/',
+ 'md5': '5d7e3195a447cb13e9267e931d8dd5a5',
+ 'info_dict': {
+ 'id': '459911',
+ 'display_id': 'Australian-sewerage-treatment-facility-',
+ 'ext': 'mp4',
+ 'title': 'Organic mattresses used to clean waste water',
+ 'upload_date': '20160409',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:20002e654bbafb6908395a5c0cfcd125'
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ # extract video URL from webpage
+ video_url = self._hidden_inputs(webpage)['inpPlayback']
+
+ # build list of available formats
+ # specified in http://www.presstv.ir/Scripts/playback.js
+ base_url = 'http://192.99.219.222:82/presstv'
+ _formats = [
+ (180, '_low200.mp4'),
+ (360, '_low400.mp4'),
+ (720, '_low800.mp4'),
+ (1080, '.mp4')
+ ]
+
+ formats = [{
+ 'url': base_url + video_url[:-4] + extension,
+ 'format_id': '%dp' % height,
+ 'height': height,
+ } for height, extension in _formats]
+
+ # extract video metadata
+ title = remove_start(
+ self._html_search_meta('title', webpage, fatal=True), 'PressTV-')
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(webpage)
+
+ upload_date = '%04d%02d%02d' % (
+ int(mobj.group('y')),
+ int(mobj.group('m')),
+ int(mobj.group('d')),
+ )
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'description': description
+ }
diff --git a/hypervideo_dl/extractor/prosiebensat1.py b/hypervideo_dl/extractor/prosiebensat1.py
new file mode 100644
index 0000000..e470882
--- /dev/null
+++ b/hypervideo_dl/extractor/prosiebensat1.py
@@ -0,0 +1,500 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from hashlib import sha1
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ unified_strdate,
+)
+
+
+class ProSiebenSat1BaseIE(InfoExtractor):
+ _GEO_BYPASS = False
+ _ACCESS_ID = None
+ _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
+ _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
+
+ def _extract_video_info(self, url, clip_id):
+ client_location = url
+
+ video = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos',
+ clip_id, 'Downloading videos JSON', query={
+ 'access_token': self._TOKEN,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'ids': clip_id,
+ })[0]
+
+ if video.get('is_protected') is True:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ formats = []
+ if self._ACCESS_ID:
+ raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
+ protocols = self._download_json(
+ self._V4_BASE_URL + 'protocols', clip_id,
+ 'Downloading protocols JSON',
+ headers=self.geo_verification_headers(), query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct).encode()).hexdigest(),
+ 'video_id': clip_id,
+ }, fatal=False, expected_status=(403,)) or {}
+ error = protocols.get('error') or {}
+ if error.get('title') == 'Geo check failed':
+ self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
+ server_token = protocols.get('server_token')
+ if server_token:
+ urls = (self._download_json(
+ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
+ 'protocols': self._SUPPORTED_PROTOCOLS,
+ 'server_token': server_token,
+ 'video_id': clip_id,
+ }, fatal=False) or {}).get('urls') or {}
+ for protocol, variant in urls.items():
+ source_url = variant.get('clear', {}).get('url')
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+ if not formats:
+ source_ids = [compat_str(source['id']) for source in video['sources']]
+
+ client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+
+ sources = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
+ clip_id, 'Downloading sources JSON', query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ })
+ server_id = sources['server_id']
+
+ def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
+ return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+
+ for source_id in source_ids:
+ client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ urls = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+ clip_id, 'Downloading urls JSON', fatal=False, query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'server_id': server_id,
+ 'source_ids': source_id,
+ })
+ if not urls:
+ continue
+ if urls.get('status_code') != 0:
+ raise ExtractorError('This video is unavailable', expected=True)
+ urls_sources = urls['sources']
+ if isinstance(urls_sources, dict):
+ urls_sources = urls_sources.values()
+ for source in urls_sources:
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ protocol = source.get('protocol')
+ mimetype = source.get('mimetype')
+ if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, clip_id, f4m_id='hds', fatal=False))
+ elif mimetype == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
+ else:
+ tbr = fix_bitrate(source['bitrate'])
+ if protocol in ('rtmp', 'rtmpe'):
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+ if not mobj:
+ continue
+ path = mobj.group('path')
+ mp4colon_index = path.rfind('mp4:')
+ app = path[:mp4colon_index]
+ play_path = path[mp4colon_index:]
+ formats.append({
+ 'url': '%s/%s' % (mobj.group('url'), app),
+ 'app': app,
+ 'play_path': play_path,
+ 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
+ 'page_url': 'http://www.prosieben.de',
+ 'tbr': tbr,
+ 'ext': 'flv',
+ 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'tbr': tbr,
+ 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'duration': float_or_none(video.get('duration')),
+ 'formats': formats,
+ }
+
+
+class ProSiebenSat1IE(ProSiebenSat1BaseIE):
+ IE_NAME = 'prosiebensat1'
+ IE_DESC = 'ProSiebenSat.1 Digital'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ (?:beta\.)?
+ (?:
+ prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
+ )\.(?:de|at|ch)|
+ ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
+ )
+ /(?P<id>.+)
+ '''
+
+ _TESTS = [
+ {
+ # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
+ # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
+ # - malformed f4m manifest support
+ # - proper handling of URLs starting with `https?://` in 2.0 manifests
+ # - recursive child f4m manifests extraction
+ 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
+ 'info_dict': {
+ 'id': '2104602',
+ 'ext': 'mp4',
+ 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
+ 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
+ 'upload_date': '20131231',
+ 'duration': 5845.04,
+ 'series': 'CIRCUS HALLIGALLI',
+ 'season_number': 2,
+ 'episode': 'Episode 18 - Staffel 2',
+ 'episode_number': 18,
+ },
+ },
+ {
+ 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
+ 'info_dict': {
+ 'id': '2570327',
+ 'ext': 'mp4',
+ 'title': 'Lady-Umstyling für Audrina',
+ 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
+ 'upload_date': '20131014',
+ 'duration': 606.76,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Seems to be broken',
+ },
+ {
+ 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
+ 'info_dict': {
+ 'id': '2429369',
+ 'ext': 'mp4',
+ 'title': 'Countdown für die Autowerkstatt',
+ 'description': 'md5:809fc051a457b5d8666013bc40698817',
+ 'upload_date': '20140223',
+ 'duration': 2595.04,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
+ 'info_dict': {
+ 'id': '2904997',
+ 'ext': 'mp4',
+ 'title': 'Sexy laufen in Ugg Boots',
+ 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
+ 'upload_date': '20140122',
+ 'duration': 245.32,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
+ 'info_dict': {
+ 'id': '2906572',
+ 'ext': 'mp4',
+ 'title': 'Im Interview: Kai Wiesinger',
+ 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
+ 'upload_date': '20140203',
+ 'duration': 522.56,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
+ 'info_dict': {
+ 'id': '2992323',
+ 'ext': 'mp4',
+ 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
+ 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
+ 'upload_date': '20141014',
+ 'duration': 2410.44,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
+ 'info_dict': {
+ 'id': '3004256',
+ 'ext': 'mp4',
+ 'title': 'Schalke: Tönnies möchte Raul zurück',
+ 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
+ 'upload_date': '20140226',
+ 'duration': 228.96,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
+ 'info_dict': {
+ 'id': '2572814',
+ 'ext': 'mp4',
+ 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
+ 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
+ 'timestamp': 1382041620,
+ 'upload_date': '20131017',
+ 'duration': 469.88,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
+ 'info_dict': {
+ 'id': '2156342',
+ 'ext': 'mp4',
+ 'title': 'Kurztrips zum Valentinstag',
+ 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
+ 'duration': 307.24,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+ 'info_dict': {
+ 'id': '439664',
+ 'title': 'Episode 8 - Ganze Folge - Playlist',
+ 'description': 'md5:63b8963e71f481782aeea877658dec84',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This video is unavailable',
+ },
+ {
+ # title in <h2 class="subtitle">
+ 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
+ 'info_dict': {
+ 'id': '4895826',
+ 'ext': 'mp4',
+ 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
+ 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
+ 'upload_date': '20170302',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'geo restricted to Germany',
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
+ 'only_matching': True,
+ },
+ ]
+
+ _TOKEN = 'prosieben'
+ _SALT = '01!8d8F_)r9]4s[qeuXfP%'
+ _CLIENT_NAME = 'kolibri-2.0.19-splec4'
+
+ _ACCESS_ID = 'x_prosiebenmaxx-de'
+ _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
+ _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
+
+ _CLIPID_REGEXES = [
+ r'"clip_id"\s*:\s+"(\d+)"',
+ r'clipid: "(\d+)"',
+ r'clip[iI]d=(\d+)',
+ r'clip[iI][dD]\s*=\s*["\'](\d+)',
+ r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
+ r'proMamsId&quot;\s*:\s*&quot;(\d+)',
+ r'proMamsId"\s*:\s*"(\d+)',
+ ]
+ _TITLE_REGEXES = [
+ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
+ r'<header class="clearfix">\s*<h3>(.+?)</h3>',
+ r'<!-- start video -->\s*<h1>(.+?)</h1>',
+ r'<h1 class="att-name">\s*(.+?)</h1>',
+ r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
+ r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
+ r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
+ r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
+ ]
+ _DESCRIPTION_REGEXES = [
+ r'<p itemprop="description">\s*(.+?)</p>',
+ r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
+ r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
+ r'<p class="att-description">\s*(.+?)\s*</p>',
+ r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
+ r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
+ ]
+ _UPLOAD_DATE_REGEXES = [
+ r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
+ r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
+ r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
+ r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
+ ]
+ _PAGE_TYPE_REGEXES = [
+ r'<meta name="page_type" content="([^"]+)">',
+ r"'itemType'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_ID_REGEXES = [
+ r'content[iI]d=(\d+)',
+ r"'itemId'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_CLIP_REGEXES = [
+ r'(?s)data-qvt=.+?<a href="([^"]+)"',
+ ]
+
+ def _extract_clip(self, url, webpage):
+ clip_id = self._html_search_regex(
+ self._CLIPID_REGEXES, webpage, 'clip id')
+ title = self._html_search_regex(
+ self._TITLE_REGEXES, webpage, 'title',
+ default=None) or self._og_search_title(webpage)
+ info = self._extract_video_info(url, clip_id)
+ description = self._html_search_regex(
+ self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
+ if description is None:
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(
+ self._html_search_meta('og:published_time', webpage,
+ 'upload date', default=None)
+ or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
+ webpage, 'upload date', default=None))
+
+ json_ld = self._search_json_ld(webpage, clip_id, default={})
+
+ return merge_dicts(info, {
+ 'id': clip_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }, json_ld)
+
+ def _extract_playlist(self, url, webpage):
+ playlist_id = self._html_search_regex(
+ self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+ webpage, 'playlist'),
+ playlist_id)
+ entries = []
+ for item in playlist:
+ clip_id = item.get('id') or item.get('upc')
+ if not clip_id:
+ continue
+ info = self._extract_video_info(url, clip_id)
+ info.update({
+ 'id': clip_id,
+ 'title': item.get('title') or item.get('teaser', {}).get('headline'),
+ 'description': item.get('teaser', {}).get('description'),
+ 'thumbnail': item.get('poster'),
+ 'duration': float_or_none(item.get('duration')),
+ 'series': item.get('tvShowTitle'),
+ 'uploader': item.get('broadcastPublisher'),
+ })
+ entries.append(info)
+ return self.playlist_result(entries, playlist_id)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ page_type = self._search_regex(
+ self._PAGE_TYPE_REGEXES, webpage,
+ 'page type', default='clip').lower()
+ if page_type == 'clip':
+ return self._extract_clip(url, webpage)
+ elif page_type == 'playlist':
+ return self._extract_playlist(url, webpage)
+ else:
+ raise ExtractorError(
+ 'Unsupported page type %s' % page_type, expected=True)
diff --git a/hypervideo_dl/extractor/puhutv.py b/hypervideo_dl/extractor/puhutv.py
new file mode 100644
index 0000000..ca71665
--- /dev/null
+++ b/hypervideo_dl/extractor/puhutv.py
@@ -0,0 +1,239 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_resolution,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class PuhuTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
+ IE_NAME = 'puhutv'
+ _TESTS = [{
+ # film
+ 'url': 'https://puhutv.com/sut-kardesler-izle',
+ 'md5': 'a347470371d56e1585d1b2c8dab01c96',
+ 'info_dict': {
+ 'id': '5085',
+ 'display_id': 'sut-kardesler',
+ 'ext': 'mp4',
+ 'title': 'Süt Kardeşler',
+ 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4832.44,
+ 'creator': 'Arzu Film',
+ 'timestamp': 1561062602,
+ 'upload_date': '20190620',
+ 'release_year': 1976,
+ 'view_count': int,
+ 'tags': list,
+ },
+ }, {
+ # episode, geo restricted, bypassable with --geo-verification-proxy
+ 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
+ 'only_matching': True,
+ }, {
+ # 4k, with subtitles
+ 'url': 'https://puhutv.com/dip-1-bolum-izle',
+ 'only_matching': True,
+ }]
+ _SUBTITLE_LANGS = {
+ 'English': 'en',
+ 'Deutsch': 'de',
+ 'عربى': 'ar'
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ info = self._download_json(
+ urljoin(url, '/api/slug/%s-izle' % display_id),
+ display_id)['data']
+
+ video_id = compat_str(info['id'])
+ show = info.get('title') or {}
+ title = info.get('name') or show['name']
+ if info.get('display_name'):
+ title = '%s %s' % (title, info['display_name'])
+
+ try:
+ videos = self._download_json(
+ 'https://puhutv.com/api/assets/%s/videos' % video_id,
+ display_id, 'Downloading video JSON',
+ headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_geo_restricted()
+ raise
+
+ urls = []
+ formats = []
+
+ for video in videos['data']['videos']:
+ media_url = url_or_none(video.get('url'))
+ if not media_url or media_url in urls:
+ continue
+ urls.append(media_url)
+
+ playlist = video.get('is_playlist')
+ if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+
+ quality = int_or_none(video.get('quality'))
+ f = {
+ 'url': media_url,
+ 'ext': 'mp4',
+ 'height': quality
+ }
+ video_format = video.get('video_format')
+ is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
+ if is_hls:
+ format_id = 'hls'
+ f['protocol'] = 'm3u8_native'
+ elif video_format == 'mp4':
+ format_id = 'http'
+ else:
+ continue
+ if quality:
+ format_id += '-%sp' % quality
+ f['format_id'] = format_id
+ formats.append(f)
+ self._sort_formats(formats)
+
+ creator = try_get(
+ show, lambda x: x['producer']['name'], compat_str)
+
+ content = info.get('content') or {}
+
+ images = try_get(
+ content, lambda x: x['images']['wide'], dict) or {}
+ thumbnails = []
+ for image_id, image_url in images.items():
+ if not isinstance(image_url, compat_str):
+ continue
+ if not image_url.startswith(('http', '//')):
+ image_url = 'https://%s' % image_url
+ t = parse_resolution(image_id)
+ t.update({
+ 'id': image_id,
+ 'url': image_url
+ })
+ thumbnails.append(t)
+
+ tags = []
+ for genre in show.get('genres') or []:
+ if not isinstance(genre, dict):
+ continue
+ genre_name = genre.get('name')
+ if genre_name and isinstance(genre_name, compat_str):
+ tags.append(genre_name)
+
+ subtitles = {}
+ for subtitle in content.get('subtitles') or []:
+ if not isinstance(subtitle, dict):
+ continue
+ lang = subtitle.get('language')
+ sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
+ if not lang or not isinstance(lang, compat_str) or not sub_url:
+ continue
+ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+ 'url': sub_url
+ }]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': info.get('description') or show.get('description'),
+ 'season_id': str_or_none(info.get('season_id')),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'release_year': int_or_none(show.get('released_at')),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'creator': creator,
+ 'view_count': int_or_none(content.get('watch_count')),
+ 'duration': float_or_none(content.get('duration_in_ms'), 1000),
+ 'tags': tags,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'formats': formats
+ }
+
+
+class PuhuTVSerieIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
+ IE_NAME = 'puhutv:serie'
+ _TESTS = [{
+ 'url': 'https://puhutv.com/deniz-yildizi-detay',
+ 'info_dict': {
+ 'title': 'Deniz Yıldızı',
+ 'id': 'deniz-yildizi',
+ },
+ 'playlist_mincount': 205,
+ }, {
+ # a film detail page which is using same url with serie page
+ 'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
+ 'only_matching': True,
+ }]
+
+ def _extract_entries(self, seasons):
+ for season in seasons:
+ season_id = season.get('id')
+ if not season_id:
+ continue
+ page = 1
+ has_more = True
+ while has_more is True:
+ season = self._download_json(
+ 'https://galadriel.puhutv.com/seasons/%s' % season_id,
+ season_id, 'Downloading page %s' % page, query={
+ 'page': page,
+ 'per': 40,
+ })
+ episodes = season.get('episodes')
+ if isinstance(episodes, list):
+ for ep in episodes:
+ slug_path = str_or_none(ep.get('slugPath'))
+ if not slug_path:
+ continue
+ video_id = str_or_none(int_or_none(ep.get('id')))
+ yield self.url_result(
+ 'https://puhutv.com/%s' % slug_path,
+ ie=PuhuTVIE.ie_key(), video_id=video_id,
+ video_title=ep.get('name') or ep.get('eventLabel'))
+ page += 1
+ has_more = season.get('hasMore')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ info = self._download_json(
+ urljoin(url, '/api/slug/%s-detay' % playlist_id),
+ playlist_id)['data']
+
+ seasons = info.get('seasons')
+ if seasons:
+ return self.playlist_result(
+ self._extract_entries(seasons), playlist_id, info.get('name'))
+
+ # For films, these are using same url with series
+ video_id = info.get('slug') or info['assets'][0]['slug']
+ return self.url_result(
+ 'https://puhutv.com/%s-izle' % video_id,
+ PuhuTVIE.ie_key(), video_id)
diff --git a/hypervideo_dl/extractor/puls4.py b/hypervideo_dl/extractor/puls4.py
new file mode 100644
index 0000000..80091b8
--- /dev/null
+++ b/hypervideo_dl/extractor/puls4.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .prosiebensat1 import ProSiebenSat1BaseIE
+from ..utils import (
+ unified_strdate,
+ parse_duration,
+ compat_str,
+)
+
+
+class Puls4IE(ProSiebenSat1BaseIE):
+ _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
+ 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
+ 'info_dict': {
+ 'id': '118118',
+ 'ext': 'flv',
+ 'title': 'Tobias Homberger von myclubs im #2min2miotalk',
+ 'description': 'md5:f9def7c5e8745d6026d8885487d91955',
+ 'upload_date': '20160830',
+ 'uploader': 'PULS_4',
+ },
+ }, {
+ 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
+ 'only_matching': True,
+ }]
+ _TOKEN = 'puls4'
+ _SALT = '01!kaNgaiNgah1Ie4AeSha'
+ _CLIENT_NAME = ''
+
+ def _real_extract(self, url):
+ path = self._match_id(url)
+ content_path = self._download_json(
+ 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url']
+ media = self._download_json(
+ 'http://www.puls4.com' + content_path,
+ content_path)['mediaCurrent']
+ player_content = media['playerContent']
+ info = self._extract_video_info(url, player_content['id'])
+ info.update({
+ 'id': compat_str(media['objectId']),
+ 'title': player_content['title'],
+ 'description': media.get('description'),
+ 'thumbnail': media.get('previewLink'),
+ 'upload_date': unified_strdate(media.get('date')),
+ 'duration': parse_duration(player_content.get('duration')),
+ 'episode': player_content.get('episodePartName'),
+ 'show': media.get('channel'),
+ 'season_id': player_content.get('seasonId'),
+ 'uploader': player_content.get('sourceCompany'),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/pyvideo.py b/hypervideo_dl/extractor/pyvideo.py
new file mode 100644
index 0000000..b8ac93a
--- /dev/null
+++ b/hypervideo_dl/extractor/pyvideo.py
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class PyvideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)'
+
+ _TESTS = [{
+ 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html',
+ 'info_dict': {
+ 'id': 'become-a-logging-expert-in-30-minutes',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html',
+ 'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+ 'info_dict': {
+ 'id': '2542',
+ 'ext': 'm4v',
+ 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ category = mobj.group('category')
+ video_id = mobj.group('id')
+
+ entries = []
+
+ data = self._download_json(
+ 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json'
+ % (category, video_id), video_id, fatal=False)
+
+ if data:
+ for video in data['videos']:
+ video_url = video.get('url')
+ if video_url:
+ if video.get('type') == 'youtube':
+ entries.append(self.url_result(video_url, 'Youtube'))
+ else:
+ entries.append({
+ 'id': compat_str(data.get('id') or video_id),
+ 'url': video_url,
+ 'title': data['title'],
+ 'description': data.get('description') or data.get('summary'),
+ 'thumbnail': data.get('thumbnail_url'),
+ 'duration': int_or_none(data.get('duration')),
+ })
+ else:
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ media_urls = self._search_regex(
+ r'(?s)Media URL:(.+?)</li>', webpage, 'media urls')
+ for m in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls):
+ media_url = m.group('url')
+ if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url):
+ entries.append(self.url_result(media_url, 'Youtube'))
+ else:
+ entries.append({
+ 'id': video_id,
+ 'url': media_url,
+ 'title': title,
+ })
+
+ return self.playlist_result(entries, video_id)
diff --git a/hypervideo_dl/extractor/qqmusic.py b/hypervideo_dl/extractor/qqmusic.py
new file mode 100644
index 0000000..084308a
--- /dev/null
+++ b/hypervideo_dl/extractor/qqmusic.py
@@ -0,0 +1,369 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ strip_jsonp,
+ unescapeHTML,
+)
+
+
+class QQMusicIE(InfoExtractor):
+ IE_NAME = 'qqmusic'
+ IE_DESC = 'QQ音乐'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
+ _TESTS = [{
+ 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
+ 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
+ 'info_dict': {
+ 'id': '004295Et37taLD',
+ 'ext': 'mp3',
+ 'title': '可惜没如果',
+ 'release_date': '20141227',
+ 'creator': '林俊杰',
+ 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'note': 'There is no mp3-320 version of this song.',
+ 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
+ 'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
+ 'info_dict': {
+ 'id': '004MsGEo3DdNxV',
+ 'ext': 'mp3',
+ 'title': '如果',
+ 'release_date': '20050626',
+ 'creator': '李季美',
+ 'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'note': 'lyrics not in .lrc format',
+ 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
+ 'info_dict': {
+ 'id': '001JyApY11tIp6',
+ 'ext': 'mp3',
+ 'title': 'Shadows Over Transylvania',
+ 'release_date': '19970225',
+ 'creator': 'Dark Funeral',
+ 'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _FORMATS = {
+ 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
+ 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
+ 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}
+ }
+
+ # Reference: m_r_GetRUin() in top_player.js
+ # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+ @staticmethod
+ def m_r_get_ruin():
+ curMs = int(time.time() * 1000) % 1000
+ return int(round(random.random() * 2147483647) * curMs % 1E10)
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ detail_info_page = self._download_webpage(
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
+ mid, note='Download song detail info',
+ errnote='Unable to get song detail info', encoding='gbk')
+
+ song_name = self._html_search_regex(
+ r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+
+ publish_time = self._html_search_regex(
+ r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
+ 'publish time', default=None)
+ if publish_time:
+ publish_time = publish_time.replace('-', '')
+
+ singer = self._html_search_regex(
+ r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
+
+ lrc_content = self._html_search_regex(
+ r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
+ detail_info_page, 'LRC lyrics', default=None)
+ if lrc_content:
+ lrc_content = lrc_content.replace('\\n', '\n')
+
+ thumbnail_url = None
+ albummid = self._search_regex(
+ [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
+ detail_info_page, 'album mid', default=None)
+ if albummid:
+ thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \
+ % (albummid[-2:-1], albummid[-1], albummid)
+
+ guid = self.m_r_get_ruin()
+
+ vkey = self._download_json(
+ 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
+ mid, note='Retrieve vkey', errnote='Unable to get vkey',
+ transform_source=strip_jsonp)['key']
+
+ formats = []
+ for format_id, details in self._FORMATS.items():
+ formats.append({
+ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0'
+ % (details['prefix'], mid, details['ext'], vkey, guid),
+ 'format': format_id,
+ 'format_id': format_id,
+ 'preference': details['preference'],
+ 'abr': details.get('abr'),
+ })
+ self._check_formats(formats, mid)
+ self._sort_formats(formats)
+
+ actual_lrc_lyrics = ''.join(
+ line + '\n' for line in re.findall(
+ r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
+
+ info_dict = {
+ 'id': mid,
+ 'formats': formats,
+ 'title': song_name,
+ 'release_date': publish_time,
+ 'creator': singer,
+ 'description': lrc_content,
+ 'thumbnail': thumbnail_url
+ }
+ if actual_lrc_lyrics:
+ info_dict['subtitles'] = {
+ 'origin': [{
+ 'ext': 'lrc',
+ 'data': actual_lrc_lyrics,
+ }]
+ }
+ return info_dict
+
+
+class QQPlaylistBaseIE(InfoExtractor):
+ @staticmethod
+ def qq_static_url(category, mid):
+ return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
+
+ def get_singer_all_songs(self, singmid, num):
+ return self._download_webpage(
+ r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
+ query={
+ 'format': 'json',
+ 'inCharset': 'utf8',
+ 'outCharset': 'utf-8',
+ 'platform': 'yqq',
+ 'needNewCode': 0,
+ 'singermid': singmid,
+ 'order': 'listen',
+ 'begin': 0,
+ 'num': num,
+ 'songstatus': 1,
+ })
+
+ def get_entries_from_page(self, singmid):
+ entries = []
+
+ default_num = 1
+ json_text = self.get_singer_all_songs(singmid, default_num)
+ json_obj_all_songs = self._parse_json(json_text, singmid)
+
+ if json_obj_all_songs['code'] == 0:
+ total = json_obj_all_songs['data']['total']
+ json_text = self.get_singer_all_songs(singmid, total)
+ json_obj_all_songs = self._parse_json(json_text, singmid)
+
+ for item in json_obj_all_songs['data']['list']:
+ if item['musicData'].get('songmid') is not None:
+ songmid = item['musicData']['songmid']
+ entries.append(self.url_result(
+ r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid))
+
+ return entries
+
+
+class QQMusicSingerIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:singer'
+ IE_DESC = 'QQ音乐 - 歌手'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
+ _TEST = {
+ 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
+ 'info_dict': {
+ 'id': '001BLpXF2DyJe2',
+ 'title': '林俊杰',
+ 'description': 'md5:870ec08f7d8547c29c93010899103751',
+ },
+ 'playlist_mincount': 12,
+ }
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ entries = self.get_entries_from_page(mid)
+ singer_page = self._download_webpage(url, mid, 'Download singer page')
+ singer_name = self._html_search_regex(
+ r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
+ singer_desc = None
+
+ if mid:
+ singer_desc_page = self._download_xml(
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
+ 'Donwload singer description XML',
+ query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
+ headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
+
+ singer_desc = singer_desc_page.find('./data/info/desc').text
+
+ return self.playlist_result(entries, mid, singer_name, singer_desc)
+
+
+class QQMusicAlbumIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:album'
+ IE_DESC = 'QQ音乐 - 专辑'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
+ 'info_dict': {
+ 'id': '000gXCTb2AhRR1',
+ 'title': '我们都是这样长大的',
+ 'description': 'md5:179c5dce203a5931970d306aa9607ea6',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
+ 'info_dict': {
+ 'id': '002Y5a3b3AlCu3',
+ 'title': '그리고...',
+ 'description': 'md5:a48823755615508a95080e81b51ba729',
+ },
+ 'playlist_count': 8,
+ }]
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ album = self._download_json(
+ 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
+ mid, 'Download album page')['data']
+
+ entries = [
+ self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']
+ ) for song in album['list']
+ ]
+ album_name = album.get('name')
+ album_detail = album.get('desc')
+ if album_detail is not None:
+ album_detail = album_detail.strip()
+
+ return self.playlist_result(entries, mid, album_name, album_detail)
+
+
+class QQMusicToplistIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:toplist'
+ IE_DESC = 'QQ音乐 - 排行榜'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://y.qq.com/n/yqq/toplist/123.html',
+ 'info_dict': {
+ 'id': '123',
+ 'title': '美国iTunes榜',
+ 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/toplist/3.html',
+ 'info_dict': {
+ 'id': '3',
+ 'title': '巅峰榜·欧美',
+ 'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/toplist/106.html',
+ 'info_dict': {
+ 'id': '106',
+ 'title': '韩国Mnet榜',
+ 'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
+ },
+ 'playlist_count': 50,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ toplist_json = self._download_json(
+ 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
+ note='Download toplist page',
+ query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
+
+ entries = [self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
+ song['data']['songmid'])
+ for song in toplist_json['songlist']]
+
+ topinfo = toplist_json.get('topinfo', {})
+ list_name = topinfo.get('ListName')
+ list_description = topinfo.get('info')
+ return self.playlist_result(entries, list_id, list_name, list_description)
+
+
+class QQMusicPlaylistIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:playlist'
+ IE_DESC = 'QQ音乐 - 歌单'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
+ 'info_dict': {
+ 'id': '3462654915',
+ 'title': '韩国5月新歌精选下旬',
+ 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
+ },
+ 'playlist_count': 40,
+ 'skip': 'playlist gone',
+ }, {
+ 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
+ 'info_dict': {
+ 'id': '1374105607',
+ 'title': '易入人心的华语民谣',
+ 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。',
+ },
+ 'playlist_count': 20,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ list_json = self._download_json(
+ 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
+ list_id, 'Download list page',
+ query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
+ transform_source=strip_jsonp)
+ if not len(list_json.get('cdlist', [])):
+ if list_json.get('code'):
+ raise ExtractorError(
+ 'QQ Music said: error %d in fetching playlist info' % list_json['code'],
+ expected=True)
+ raise ExtractorError('Unable to get playlist info')
+
+ cdlist = list_json['cdlist'][0]
+ entries = [self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
+ for song in cdlist['songlist']]
+
+ list_name = cdlist.get('dissname')
+ list_description = clean_html(unescapeHTML(cdlist.get('desc')))
+ return self.playlist_result(entries, list_id, list_name, list_description)
diff --git a/hypervideo_dl/extractor/r7.py b/hypervideo_dl/extractor/r7.py
new file mode 100644
index 0000000..e2202d6
--- /dev/null
+++ b/hypervideo_dl/extractor/r7.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class R7IE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
+ noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
+ player\.r7\.com/video/i/
+ )
+ (?P<id>[\da-f]{24})
+ '''
+ _TESTS = [{
+ 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
+ 'md5': '403c4e393617e8e8ddc748978ee8efde',
+ 'info_dict': {
+ 'id': '54e7050b0cf2ff57e0279389',
+ 'ext': 'mp4',
+ 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+ 'description': 'md5:01812008664be76a6479aa58ec865b72',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 98,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://player-api.r7.com/video/i/%s' % video_id, video_id)
+
+ title = video['title']
+
+ formats = []
+ media_url_hls = video.get('media_url_hls')
+ if media_url_hls:
+ formats.extend(self._extract_m3u8_formats(
+ media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ media_url = video.get('media_url')
+ if media_url:
+ f = {
+ 'url': media_url,
+ 'format_id': 'http',
+ }
+ # m3u8 format always matches the http format, let's copy metadata from
+ # one to another
+ m3u8_formats = list(filter(
+ lambda f: f.get('vcodec') != 'none', formats))
+ if len(m3u8_formats) == 1:
+ f_copy = m3u8_formats[0].copy()
+ f_copy.update(f)
+ f_copy['protocol'] = 'http'
+ f = f_copy
+ formats.append(f)
+ self._sort_formats(formats)
+
+ description = video.get('description')
+ thumbnail = video.get('thumb')
+ duration = int_or_none(video.get('media_duration'))
+ like_count = int_or_none(video.get('likes'))
+ view_count = int_or_none(video.get('views'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'like_count': like_count,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
+
+
+class R7ArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})',
+ webpage, 'video id')
+
+ return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key())
diff --git a/hypervideo_dl/extractor/radiobremen.py b/hypervideo_dl/extractor/radiobremen.py
new file mode 100644
index 0000000..2c35f98
--- /dev/null
+++ b/hypervideo_dl/extractor/radiobremen.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class RadioBremenIE(InfoExtractor):
+ _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
+ IE_NAME = 'radiobremen'
+
+ _TEST = {
+ 'url': 'http://www.radiobremen.de/mediathek/?id=141876',
+ 'info_dict': {
+ 'id': '141876',
+ 'ext': 'mp4',
+ 'duration': 178,
+ 'width': 512,
+ 'title': 'Druck auf Patrick Öztürk',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id
+ meta_doc = self._download_webpage(
+ meta_url, video_id, 'Downloading metadata')
+ title = self._html_search_regex(
+ r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title')
+ description = self._html_search_regex(
+ r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'L&auml;nge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>',
+ meta_doc, 'duration', fatal=False))
+
+ page_doc = self._download_webpage(
+ url, video_id, 'Downloading video information')
+ mobj = re.search(
+ r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
+ page_doc)
+ video_url = (
+ "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
+ (video_id, video_id, mobj.group("secret"), mobj.group('width')))
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'width': int(mobj.group('width')),
+ }]
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnail': mobj.group('thumbnail'),
+ }
diff --git a/hypervideo_dl/extractor/radiocanada.py b/hypervideo_dl/extractor/radiocanada.py
new file mode 100644
index 0000000..a28b1a2
--- /dev/null
+++ b/hypervideo_dl/extractor/radiocanada.py
@@ -0,0 +1,171 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ unified_strdate,
+)
+
+
+class RadioCanadaIE(InfoExtractor):
+ IE_NAME = 'radiocanada'
+ _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
+ 'info_dict': {
+ 'id': '7184272',
+ 'ext': 'mp4',
+ 'title': 'Le parcours du tireur capté sur vidéo',
+ 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
+ 'upload_date': '20141023',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ },
+ {
+ # empty Title
+ 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
+ 'info_dict': {
+ 'id': '7754998',
+ 'ext': 'mp4',
+ 'title': 'letelejournal22h',
+ 'description': 'INTEGRALE WEB 22H-TJ',
+ 'upload_date': '20170720',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ # with protectionType but not actually DRM protected
+ 'url': 'radiocanada:toutv:140872',
+ 'info_dict': {
+ 'id': '140872',
+ 'title': 'Épisode 1',
+ 'series': 'District 31',
+ },
+ 'only_matching': True,
+ }
+ ]
+ _GEO_COUNTRIES = ['CA']
+ _access_token = None
+ _claims = None
+
+ def _call_api(self, path, video_id=None, app_code=None, query=None):
+ if not query:
+ query = {}
+ query.update({
+ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
+ 'output': 'json',
+ })
+ if video_id:
+ query.update({
+ 'appCode': app_code,
+ 'idMedia': video_id,
+ })
+ if self._access_token:
+ query['access_token'] = self._access_token
+ try:
+ return self._download_json(
+ 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
+ data = self._parse_json(e.cause.read().decode(), None)
+ error = data.get('error_description') or data['errorMessage']['text']
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract_info(self, app_code, video_id):
+ metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
+
+ def get_meta(name):
+ for meta in metas:
+ if meta.get('name') == name:
+ text = meta.get('text')
+ if text:
+ return text
+
+ # protectionType does not necessarily mean the video is DRM protected (see
+ # https://github.com/ytdl-org/youtube-dl/pull/18609).
+ if get_meta('protectionType'):
+ self.report_warning('This video is probably DRM protected.')
+
+ query = {
+ 'connectionType': 'hd',
+ 'deviceType': 'ipad',
+ 'multibitrate': 'true',
+ }
+ if self._claims:
+ query['claims'] = self._claims
+ v_data = self._call_api('validation/v2/', video_id, app_code, query)
+ v_url = v_data.get('url')
+ if not v_url:
+ error = v_data['message']
+ if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
+ raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
+ if error == 'Le contenu sélectionné est disponible seulement en premium':
+ self.raise_login_required(error)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
+ if closed_caption_url:
+ subtitles['fr'] = [{
+ 'url': closed_caption_url,
+ 'ext': determine_ext(closed_caption_url, 'vtt'),
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': get_meta('Title') or get_meta('AV-nomEmission'),
+ 'description': get_meta('Description') or get_meta('ShortDescription'),
+ 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
+ 'duration': int_or_none(get_meta('length')),
+ 'series': get_meta('Emission'),
+ 'season_number': int_or_none('SrcSaison'),
+ 'episode_number': int_or_none('SrcEpisode'),
+ 'upload_date': unified_strdate(get_meta('Date')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_info(*re.match(self._VALID_URL, url).groups())
+
+
+class RadioCanadaAudioVideoIE(InfoExtractor):
+ IE_NAME = 'radiocanada:audiovideo'
+ _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
+ 'info_dict': {
+ 'id': '7527184',
+ 'ext': 'mp4',
+ 'title': 'Barack Obama au Vietnam',
+ 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
+ 'upload_date': '20160523',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
diff --git a/hypervideo_dl/extractor/radiode.py b/hypervideo_dl/extractor/radiode.py
new file mode 100644
index 0000000..2c06c8b
--- /dev/null
+++ b/hypervideo_dl/extractor/radiode.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RadioDeIE(InfoExtractor):
+ IE_NAME = 'radio.de'
+ _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
+ _TEST = {
+ 'url': 'http://ndr2.radio.de/',
+ 'info_dict': {
+ 'id': 'ndr2',
+ 'ext': 'mp3',
+ 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:591c49c702db1a33751625ebfb67f273',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ radio_id = self._match_id(url)
+ webpage = self._download_webpage(url, radio_id)
+ jscode = self._search_regex(
+ r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
+ webpage, 'broadcast')
+
+ broadcast = self._parse_json(jscode, radio_id)
+ title = self._live_title(broadcast['name'])
+ description = broadcast.get('description') or broadcast.get('shortDescription')
+ thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
+
+ formats = [{
+ 'url': stream['streamUrl'],
+ 'ext': stream['streamContentFormat'].lower(),
+ 'acodec': stream['streamContentFormat'],
+ 'abr': stream['bitRate'],
+ 'asr': stream['sampleRate']
+ } for stream in broadcast['streamUrls']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': radio_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/radiofrance.py b/hypervideo_dl/extractor/radiofrance.py
new file mode 100644
index 0000000..a8afc00
--- /dev/null
+++ b/hypervideo_dl/extractor/radiofrance.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RadioFranceIE(InfoExtractor):
+ _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
+ IE_NAME = 'radiofrance'
+
+ _TEST = {
+ 'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
+ 'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
+ 'info_dict': {
+ 'id': 'one-one',
+ 'ext': 'ogg',
+ 'title': 'One to one',
+ 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
+ 'uploader': 'Thomas Hercouët',
+ },
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+ description = self._html_search_regex(
+ r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
+ webpage, 'description', fatal=False)
+ uploader = self._html_search_regex(
+ r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
+ webpage, 'uploader', fatal=False)
+
+ formats_str = self._html_search_regex(
+ r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
+ webpage, 'audio URLs')
+ formats = [
+ {
+ 'format_id': fm[0],
+ 'url': fm[1],
+ 'vcodec': 'none',
+ 'preference': i,
+ }
+ for i, fm in
+ enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
+ ]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'uploader': uploader,
+ }
diff --git a/hypervideo_dl/extractor/radiojavan.py b/hypervideo_dl/extractor/radiojavan.py
new file mode 100644
index 0000000..3f74f0c
--- /dev/null
+++ b/hypervideo_dl/extractor/radiojavan.py
@@ -0,0 +1,83 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_resolution,
+ str_to_int,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+ 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+ 'info_dict': {
+ 'id': 'chaartaar-ashoobam',
+ 'ext': 'mp4',
+ 'title': 'Chaartaar - Ashoobam',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'upload_date': '20150215',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ download_host = self._download_json(
+ 'https://www.radiojavan.com/videos/video_host', video_id,
+ data=urlencode_postdata({'id': video_id}),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ }).get('host', 'https://host1.rjmusicmedia.com')
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for format_id, _, video_path in re.findall(
+ r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage):
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': urljoin(download_host, video_path),
+ 'format_id': format_id,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class="date_added">Date added: ([^<]+)<',
+ webpage, 'upload date', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'class="views">Plays: ([\d,]+)',
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) likes',
+ webpage, 'like count', fatal=False))
+ dislike_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) dislikes',
+ webpage, 'dislike count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/rai.py b/hypervideo_dl/extractor/rai.py
new file mode 100644
index 0000000..67b86fc
--- /dev/null
+++ b/hypervideo_dl/extractor/rai.py
@@ -0,0 +1,487 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ GeoRestrictedError,
+ int_or_none,
+ parse_duration,
+ remove_start,
+ strip_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ update_url_query,
+ urljoin,
+ xpath_text,
+)
+
+
+class RaiBaseIE(InfoExtractor):
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _GEO_COUNTRIES = ['IT']
+ _GEO_BYPASS = False
+
+ def _extract_relinker_info(self, relinker_url, video_id):
+ if not re.match(r'https?://', relinker_url):
+ return {'formats': [{'url': relinker_url}]}
+
+ formats = []
+ geoprotection = None
+ is_live = None
+ duration = None
+
+ for platform in ('mon', 'flash', 'native'):
+ relinker = self._download_xml(
+ relinker_url, video_id,
+ note='Downloading XML metadata for platform %s' % platform,
+ transform_source=fix_xml_ampersands,
+ query={'output': 45, 'pl': platform},
+ headers=self.geo_verification_headers())
+
+ if not geoprotection:
+ geoprotection = xpath_text(
+ relinker, './geoprotection', default=None) == 'Y'
+
+ if not is_live:
+ is_live = xpath_text(
+ relinker, './is_live', default=None) == 'Y'
+ if not duration:
+ duration = parse_duration(xpath_text(
+ relinker, './duration', default=None))
+
+ url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
+ if url_elem is None:
+ continue
+
+ media_url = url_elem.text
+
+ # This does not imply geo restriction (e.g.
+ # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
+ if '/video_no_available.mp4' in media_url:
+ continue
+
+ ext = determine_ext(media_url)
+ if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
+ continue
+
+ if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'f4m' or platform == 'flash':
+ manifest_url = update_url_query(
+ media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
+ {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
+ formats.extend(self._extract_f4m_formats(
+ manifest_url, video_id, f4m_id='hds', fatal=False))
+ else:
+ bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
+ formats.append({
+ 'url': media_url,
+ 'tbr': bitrate if bitrate > 0 else None,
+ 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
+ })
+
+ if not formats and geoprotection is True:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
+ return dict((k, v) for k, v in {
+ 'is_live': is_live,
+ 'duration': duration,
+ 'formats': formats,
+ }.items() if v is not None)
+
+ @staticmethod
+ def _extract_subtitles(url, video_data):
+ STL_EXT = 'stl'
+ SRT_EXT = 'srt'
+ subtitles = {}
+ subtitles_array = video_data.get('subtitlesArray') or []
+ for k in ('subtitles', 'subtitlesUrl'):
+ subtitles_array.append({'url': video_data.get(k)})
+ for subtitle in subtitles_array:
+ sub_url = subtitle.get('url')
+ if sub_url and isinstance(sub_url, compat_str):
+ sub_lang = subtitle.get('language') or 'it'
+ sub_url = urljoin(url, sub_url)
+ sub_ext = determine_ext(sub_url, SRT_EXT)
+ subtitles.setdefault(sub_lang, []).append({
+ 'ext': sub_ext,
+ 'url': sub_url,
+ })
+ if STL_EXT == sub_ext:
+ subtitles[sub_lang].append({
+ 'ext': SRT_EXT,
+ 'url': sub_url[:-len(STL_EXT)] + SRT_EXT,
+ })
+ return subtitles
+
+
+class RaiPlayIE(RaiBaseIE):
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
+ _TESTS = [{
+ 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
+ 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
+ 'info_dict': {
+ 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
+ 'ext': 'mp4',
+ 'title': 'Report del 07/04/2014',
+ 'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
+ 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Rai Gulp',
+ 'duration': 6160,
+ 'series': 'Report',
+ 'season': '2013/14',
+ 'subtitles': {
+ 'it': 'count:2',
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
+ 'only_matching': True,
+ }, {
+ # subtitles at 'subtitlesArray' key (see #27698)
+ 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ base, video_id = re.match(self._VALID_URL, url).groups()
+
+ media = self._download_json(
+ base + '.json', video_id, 'Downloading video JSON')
+
+ title = media['name']
+
+ video = media['video']
+
+ relinker_info = self._extract_relinker_info(video['content_url'], video_id)
+ self._sort_formats(relinker_info['formats'])
+
+ thumbnails = []
+ for _, value in media.get('images', {}).items():
+ if value:
+ thumbnails.append({
+ 'url': urljoin(url, value),
+ })
+
+ date_published = media.get('date_published')
+ time_published = media.get('time_published')
+ if date_published and time_published:
+ date_published += ' ' + time_published
+
+ subtitles = self._extract_subtitles(url, video)
+
+ program_info = media.get('program_info') or {}
+ season = media.get('season')
+
+ info = {
+ 'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
+ 'display_id': video_id,
+ 'title': self._live_title(title) if relinker_info.get(
+ 'is_live') else title,
+ 'alt_title': strip_or_none(media.get('subtitle')),
+ 'description': media.get('description'),
+ 'uploader': strip_or_none(media.get('channel')),
+ 'creator': strip_or_none(media.get('editor') or None),
+ 'duration': parse_duration(video.get('duration')),
+ 'timestamp': unified_timestamp(date_published),
+ 'thumbnails': thumbnails,
+ 'series': program_info.get('name'),
+ 'season_number': int_or_none(season),
+ 'season': season if (season and not season.isdigit()) else None,
+ 'episode': media.get('episode_title'),
+ 'episode_number': int_or_none(media.get('episode')),
+ 'subtitles': subtitles,
+ }
+
+ info.update(relinker_info)
+ return info
+
+
+class RaiPlayLiveIE(RaiPlayIE):
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'http://www.raiplay.it/dirette/rainews24',
+ 'info_dict': {
+ 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
+ 'display_id': 'rainews24',
+ 'ext': 'mp4',
+ 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
+ 'uploader': 'Rai News 24',
+ 'creator': 'Rai News 24',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+
+class RaiPlayPlaylistIE(InfoExtractor):
+ _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
+ 'info_dict': {
+ 'id': 'nondirloalmiocapo',
+ 'title': 'Non dirlo al mio capo',
+ 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _real_extract(self, url):
+ base, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ program = self._download_json(
+ base + '.json', playlist_id, 'Downloading program JSON')
+
+ entries = []
+ for b in (program.get('blocks') or []):
+ for s in (b.get('sets') or []):
+ s_id = s.get('id')
+ if not s_id:
+ continue
+ medias = self._download_json(
+ '%s/%s.json' % (base, s_id), s_id,
+ 'Downloading content set JSON', fatal=False)
+ if not medias:
+ continue
+ for m in (medias.get('items') or []):
+ path_id = m.get('path_id')
+ if not path_id:
+ continue
+ video_url = urljoin(url, path_id)
+ entries.append(self.url_result(
+ video_url, ie=RaiPlayIE.ie_key(),
+ video_id=RaiPlayIE._match_id(video_url)))
+
+ return self.playlist_result(
+ entries, playlist_id, program.get('name'),
+ try_get(program, lambda x: x['program_info']['description']))
+
+
+class RaiIE(RaiBaseIE):
+ _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
+ _TESTS = [{
+ # var uniquename = "ContentItem-..."
+ # data-id="ContentItem-..."
+ 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
+ 'info_dict': {
+ 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
+ 'ext': 'mp4',
+ 'title': 'TG PRIMO TEMPO',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1758,
+ 'upload_date': '20140612',
+ },
+ 'skip': 'This content is available only in Italy',
+ }, {
+ # with ContentItem in many metas
+ 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
+ 'info_dict': {
+ 'id': '1632c009-c843-4836-bb65-80c33084a64b',
+ 'ext': 'mp4',
+ 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
+ 'description': 'I film in uscita questa settimana.',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 833,
+ 'upload_date': '20161103',
+ }
+ }, {
+ # with ContentItem in og:url
+ 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
+ 'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
+ 'info_dict': {
+ 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
+ 'ext': 'mp4',
+ 'title': 'TG1 ore 20:00 del 03/11/2016',
+ 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2214,
+ 'upload_date': '20161103',
+ }
+ }, {
+ # initEdizione('ContentItem-...'
+ 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
+ 'info_dict': {
+ 'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
+ 'ext': 'mp4',
+ 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
+ 'duration': 2274,
+ 'upload_date': '20170401',
+ },
+ 'skip': 'Changes daily',
+ }, {
+ # HLS live stream with ContentItem in og:url
+ 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
+ 'info_dict': {
+ 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
+ 'ext': 'mp4',
+ 'title': 'La diretta di Rainews24',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key
+ 'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
+ 'info_dict': {
+ 'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
+ 'ext': 'mp4',
+ 'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
+ 'description': 'md5:d291b03407ec505f95f27970c0b025f4',
+ 'upload_date': '20150913',
+ 'subtitles': {
+ 'it': 'count:2',
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Direct MMS URL
+ 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_from_content_id(self, content_id, url):
+ media = self._download_json(
+ 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
+ content_id, 'Downloading video JSON')
+
+ title = media['name'].strip()
+
+ media_type = media['type']
+ if 'Audio' in media_type:
+ relinker_info = {
+ 'formats': [{
+ 'format_id': media.get('formatoAudio'),
+ 'url': media['audioUrl'],
+ 'ext': media.get('formatoAudio'),
+ }]
+ }
+ elif 'Video' in media_type:
+ relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
+ else:
+ raise ExtractorError('not a media file')
+
+ self._sort_formats(relinker_info['formats'])
+
+ thumbnails = []
+ for image_type in ('image', 'image_medium', 'image_300'):
+ thumbnail_url = media.get(image_type)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': compat_urlparse.urljoin(url, thumbnail_url),
+ })
+
+ subtitles = self._extract_subtitles(url, media)
+
+ info = {
+ 'id': content_id,
+ 'title': title,
+ 'description': strip_or_none(media.get('desc')),
+ 'thumbnails': thumbnails,
+ 'uploader': media.get('author'),
+ 'upload_date': unified_strdate(media.get('date')),
+ 'duration': parse_duration(media.get('length')),
+ 'subtitles': subtitles,
+ }
+
+ info.update(relinker_info)
+
+ return info
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ content_item_id = None
+
+ content_item_url = self._html_search_meta(
+ ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
+ 'twitter:player', 'jsonlink'), webpage, default=None)
+ if content_item_url:
+ content_item_id = self._search_regex(
+ r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
+ 'content item id', default=None)
+
+ if not content_item_id:
+ content_item_id = self._search_regex(
+ r'''(?x)
+ (?:
+ (?:initEdizione|drawMediaRaiTV)\(|
+ <(?:[^>]+\bdata-id|var\s+uniquename)=|
+ <iframe[^>]+\bsrc=
+ )
+ (["\'])
+ (?:(?!\1).)*\bContentItem-(?P<id>%s)
+ ''' % self._UUID_RE,
+ webpage, 'content item id', default=None, group='id')
+
+ content_item_ids = set()
+ if content_item_id:
+ content_item_ids.add(content_item_id)
+ if video_id not in content_item_ids:
+ content_item_ids.add(video_id)
+
+ for content_item_id in content_item_ids:
+ try:
+ return self._extract_from_content_id(content_item_id, url)
+ except GeoRestrictedError:
+ raise
+ except ExtractorError:
+ pass
+
+ relinker_url = self._proto_relative_url(self._search_regex(
+ r'''(?x)
+ (?:
+ var\s+videoURL|
+ mediaInfo\.mediaUri
+ )\s*=\s*
+ ([\'"])
+ (?P<url>
+ (?:https?:)?
+ //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
+ (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
+ ''',
+ webpage, 'relinker URL', group='url'))
+
+ relinker_info = self._extract_relinker_info(
+ urljoin(url, relinker_url), video_id)
+ self._sort_formats(relinker_info['formats'])
+
+ title = self._search_regex(
+ r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ }
+
+ info.update(relinker_info)
+
+ return info
diff --git a/hypervideo_dl/extractor/raywenderlich.py b/hypervideo_dl/extractor/raywenderlich.py
new file mode 100644
index 0000000..5411ece
--- /dev/null
+++ b/hypervideo_dl/extractor/raywenderlich.py
@@ -0,0 +1,179 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class RayWenderlichIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ videos\.raywenderlich\.com/courses|
+ (?:www\.)?raywenderlich\.com
+ )/
+ (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
+ 'info_dict': {
+ 'id': '248377018',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ 'description': 'md5:804d031b3efa9fcb49777d512d74f722',
+ 'timestamp': 1513906277,
+ 'upload_date': '20171222',
+ 'duration': 133,
+ 'uploader': 'Ray Wenderlich',
+ 'uploader_id': 'user3304672',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ 'add_ie': [VimeoIE.ie_key()],
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ }, {
+ 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_video_id(data, lesson_id):
+ if not data:
+ return
+ groups = try_get(data, lambda x: x['groups'], list) or []
+ if not groups:
+ return
+ for group in groups:
+ if not isinstance(group, dict):
+ continue
+ contents = try_get(data, lambda x: x['contents'], list) or []
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ ordinal = int_or_none(content.get('ordinal'))
+ if ordinal != lesson_id:
+ continue
+ video_id = content.get('identifier')
+ if video_id:
+ return compat_str(video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id, lesson_id = mobj.group('course_id', 'id')
+ display_id = '%s/%s' % (course_id, lesson_id)
+
+ webpage = self._download_webpage(url, display_id)
+
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image', webpage, 'thumbnail')
+
+ if '>Subscribe to unlock' in webpage:
+ raise ExtractorError(
+ 'This content is only available for subscribers',
+ expected=True)
+
+ info = {
+ 'thumbnail': thumbnail,
+ }
+
+ vimeo_id = self._search_regex(
+ r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
+
+ if not vimeo_id:
+ data = self._parse_json(
+ self._search_regex(
+ r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
+ 'data collection', default='{}', group='data'),
+ display_id, transform_source=unescapeHTML, fatal=False)
+ video_id = self._extract_video_id(
+ data, lesson_id) or self._search_regex(
+ r'/videos/(\d+)/', thumbnail, 'video id')
+ headers = {
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ csrf_token = self._html_search_meta(
+ 'csrf-token', webpage, 'csrf token', default=None)
+ if csrf_token:
+ headers['X-CSRF-Token'] = csrf_token
+ video = self._download_json(
+ 'https://videos.raywenderlich.com/api/v1/videos/%s.json'
+ % video_id, display_id, headers=headers)['video']
+ vimeo_id = video['clips'][0]['provider_id']
+ info.update({
+ '_type': 'url_transparent',
+ 'title': video.get('name'),
+ 'description': video.get('description') or video.get(
+ 'meta_description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('created_at')),
+ })
+
+ return merge_dicts(info, self.url_result(
+ VimeoIE._smuggle_referrer(
+ 'https://player.vimeo.com/video/%s' % vimeo_id, url),
+ ie=VimeoIE.ie_key(), video_id=vimeo_id))
+
+
+class RayWenderlichCourseIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ videos\.raywenderlich\.com/courses|
+ (?:www\.)?raywenderlich\.com
+ )/
+ (?P<id>[^/]+)
+ '''
+
+ _TEST = {
+ 'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
+ 'info_dict': {
+ 'title': 'Testing in iOS',
+ 'id': '3530-testing-in-ios',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 29,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RayWenderlichIE.suitable(url) else super(
+ RayWenderlichCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_id)
+
+ entries = []
+ lesson_urls = set()
+ for lesson_url in re.findall(
+ r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
+ if lesson_url in lesson_urls:
+ continue
+ lesson_urls.add(lesson_url)
+ entries.append(self.url_result(
+ urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None)
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/hypervideo_dl/extractor/rbmaradio.py b/hypervideo_dl/extractor/rbmaradio.py
new file mode 100644
index 0000000..ae7413f
--- /dev/null
+++ b/hypervideo_dl/extractor/rbmaradio.py
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ int_or_none,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class RBMARadioIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
+ 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
+ 'info_dict': {
+ 'id': 'ford-lopatin-live-at-primavera-sound-2011',
+ 'ext': 'mp3',
+ 'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2452,
+ 'timestamp': 1307103164,
+ 'upload_date': '20110603',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_id = mobj.group('show_id')
+ episode_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, episode_id)
+
+ episode = self._parse_json(
+ self._search_regex(
+ r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>',
+ webpage, 'json data'),
+ episode_id)['episodes'][show_id][episode_id]
+
+ title = episode['title']
+
+ show_title = episode.get('showTitle')
+ if show_title:
+ title = '%s - %s' % (show_title, title)
+
+ formats = [{
+ 'url': update_url_query(episode['audioURL'], query={'cbr': abr}),
+ 'format_id': compat_str(abr),
+ 'abr': abr,
+ 'vcodec': 'none',
+ } for abr in (96, 128, 192, 256)]
+ self._check_formats(formats, episode_id)
+
+ description = clean_html(episode.get('longTeaser'))
+ thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
+ duration = int_or_none(episode.get('duration'))
+ timestamp = unified_timestamp(episode.get('publishedAt'))
+
+ return {
+ 'id': episode_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/rds.py b/hypervideo_dl/extractor/rds.py
new file mode 100644
index 0000000..0c49785
--- /dev/null
+++ b/hypervideo_dl/extractor/rds.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ js_to_json,
+)
+from ..compat import compat_str
+
+
+class RDSIE(InfoExtractor):
+ IE_DESC = 'RDS.ca'
+ _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
+
+ _TESTS = [{
+ # has two 9c9media ContentPackages, the web player selects the first ContentPackage
+ 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606',
+ 'info_dict': {
+ 'id': '2083309',
+ 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande',
+ 'ext': 'flv',
+ 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande',
+ 'description': 'md5:83fa38ecc4a79b19e433433254077f25',
+ 'timestamp': 1606129030,
+ 'upload_date': '20201123',
+ 'duration': 773.039,
+ }
+ }, {
+ 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
+ video_id = compat_str(item['id'])
+ title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
+ [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
+ r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
+ webpage, 'thumbnail', fatal=False)
+ timestamp = parse_iso8601(self._search_regex(
+ r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
+ webpage, 'upload date', fatal=False))
+ duration = parse_duration(self._search_regex(
+ r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
+ webpage, 'duration', fatal=False))
+ age_limit = self._family_friendly_search(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': '9c9media:rds_web:%s' % video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'ie_key': 'NineCNineMedia',
+ }
diff --git a/hypervideo_dl/extractor/redbulltv.py b/hypervideo_dl/extractor/redbulltv.py
new file mode 100644
index 0000000..6d000b3
--- /dev/null
+++ b/hypervideo_dl/extractor/redbulltv.py
@@ -0,0 +1,231 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ float_or_none,
+ ExtractorError,
+)
+
+
+class RedBullTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)'
+ _TESTS = [{
+ # film
+ 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
+ 'md5': 'fb0445b98aa4394e504b413d98031d1f',
+ 'info_dict': {
+ 'id': 'AP-1Q6XCDTAN1W11',
+ 'ext': 'mp4',
+ 'title': 'ABC of... WRC - ABC of... S1E6',
+ 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
+ 'duration': 1582.04,
+ },
+ }, {
+ # episode
+ 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11',
+ 'info_dict': {
+ 'id': 'AP-1PMHKJFCW1W11',
+ 'ext': 'mp4',
+ 'title': 'Grime - Hashtags S2E4',
+ 'description': 'md5:5546aa612958c08a98faaad4abce484d',
+ 'duration': 904,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11',
+ 'only_matching': True,
+ }]
+
+ def extract_info(self, video_id):
+ session = self._download_json(
+ 'https://api.redbull.tv/v3/session', video_id,
+ note='Downloading access token', query={
+ 'category': 'personal_computer',
+ 'os_family': 'http',
+ })
+ if session.get('code') == 'error':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, session['message']))
+ token = session['token']
+
+ try:
+ video = self._download_json(
+ 'https://api.redbull.tv/v3/products/' + video_id,
+ video_id, note='Downloading video information',
+ headers={'Authorization': token}
+ )
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ error_message = self._parse_json(
+ e.cause.read().decode(), video_id)['error']
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, error_message), expected=True)
+ raise
+
+ title = video['title'].strip()
+
+ formats = self._extract_m3u8_formats(
+ 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
+ video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for resource in video.get('resources', []):
+ if resource.startswith('closed_caption_'):
+ splitted_resource = resource.split('_')
+ if splitted_resource[2]:
+ subtitles.setdefault('en', []).append({
+ 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource),
+ 'ext': splitted_resource[2],
+ })
+
+ subheading = video.get('subheading')
+ if subheading:
+ title += ' - %s' % subheading
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('long_description') or video.get(
+ 'short_description'),
+ 'duration': float_or_none(video.get('duration'), scale=1000),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.extract_info(video_id)
+
+
+class RedBullEmbedIE(RedBullTVIE):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})'
+ _TESTS = [{
+ # HLS manifest accessible only using assetId
+ 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT',
+ 'only_matching': True,
+ }]
+ _VIDEO_ESSENSE_TMPL = '''... on %s {
+ videoEssence {
+ attributes
+ }
+ }'''
+
+ def _real_extract(self, url):
+ rrn_id = self._match_id(url)
+ asset_id = self._download_json(
+ 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
+ rrn_id, headers={
+ 'Accept': 'application/json',
+ 'API-KEY': 'e90a1ff11335423998b100c929ecc866',
+ }, query={
+ 'query': '''{
+ resource(id: "%s", enforceGeoBlocking: false) {
+ %s
+ %s
+ }
+}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'),
+ })['data']['resource']['videoEssence']['attributes']['assetId']
+ return self.extract_info(asset_id)
+
+
+class RedBullTVRrnContentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ region, lang, rrn_id = re.search(self._VALID_URL, url).groups()
+ rrn_id += ':%s-%s' % (lang, region.upper())
+ return self.url_result(
+ 'https://www.redbull.com/embed/' + rrn_id,
+ RedBullEmbedIE.ie_key(), rrn_id)
+
+
+class RedBullIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04',
+ 'md5': 'db8271a7200d40053a1809ed0dd574ff',
+ 'info_dict': {
+ 'id': 'AA-1MT8DQWA91W14',
+ 'ext': 'mp4',
+ 'title': 'Grime - Hashtags S2E4',
+ 'description': 'md5:5546aa612958c08a98faaad4abce484d',
+ },
+ }, {
+ 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ # only available on the int-en website so a fallback is need for the API
+ # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero
+ 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia',
+ 'only_matching': True,
+ }]
+ _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr']
+ _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe']
+
+ def _real_extract(self, url):
+ region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups()
+ if filter_type == 'episodes':
+ filter_type = 'episode-videos'
+ elif filter_type == 'live':
+ filter_type = 'live-videos'
+
+ regions = [region.upper()]
+ if region != 'int':
+ if region in self._LAT_FALLBACK_MAP:
+ regions.append('LAT')
+ if lang in self._INT_FALLBACK_LIST:
+ regions.append('INT')
+ locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions])
+
+ rrn_id = self._download_json(
+ 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale,
+ display_id, query={
+ 'filter[type]': filter_type,
+ 'filter[uriSlug]': display_id,
+ 'rb3Schema': 'v1:hero',
+ })['data']['id']
+
+ return self.url_result(
+ 'https://www.redbull.com/embed/' + rrn_id,
+ RedBullEmbedIE.ie_key(), rrn_id)
diff --git a/hypervideo_dl/extractor/reddit.py b/hypervideo_dl/extractor/reddit.py
new file mode 100644
index 0000000..222fa01
--- /dev/null
+++ b/hypervideo_dl/extractor/reddit.py
@@ -0,0 +1,161 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class RedditIE(InfoExtractor):
+ _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
+ _TEST = {
+ # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
+ 'url': 'https://v.redd.it/zv89llsvexdz',
+ 'md5': '0a070c53eba7ec4534d95a5a1259e253',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'title': 'zv89llsvexdz',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats = self._extract_m3u8_formats(
+ 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
+ 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+ formats.extend(self._extract_mpd_formats(
+ 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
+ mpd_id='dash', fatal=False))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+
+class RedditRIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'title': 'That small heart attack.',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:4',
+ 'timestamp': 1501941939,
+ 'upload_date': '20170805',
+ 'uploader': 'Antw87',
+ 'duration': 12,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
+ 'only_matching': True,
+ }, {
+ # imgur
+ 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+ 'only_matching': True,
+ }, {
+ # imgur @ old reddit
+ 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+ 'only_matching': True,
+ }, {
+ # streamable
+ 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
+ 'only_matching': True,
+ }, {
+ # youtube
+ 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
+ 'only_matching': True,
+ }, {
+ # reddit video @ nm reddit
+ 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ url, video_id = mobj.group('url', 'id')
+
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ url + '/.json', video_id)[0]['data']['children'][0]['data']
+
+ video_url = data['url']
+
+ # Avoid recursing into the same reddit URL
+ if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
+ raise ExtractorError('No media found', expected=True)
+
+ over_18 = data.get('over_18')
+ if over_18 is True:
+ age_limit = 18
+ elif over_18 is False:
+ age_limit = 0
+ else:
+ age_limit = None
+
+ thumbnails = []
+
+ def add_thumbnail(src):
+ if not isinstance(src, dict):
+ return
+ thumbnail_url = url_or_none(src.get('url'))
+ if not thumbnail_url:
+ return
+ thumbnails.append({
+ 'url': unescapeHTML(thumbnail_url),
+ 'width': int_or_none(src.get('width')),
+ 'height': int_or_none(src.get('height')),
+ })
+
+ for image in try_get(data, lambda x: x['preview']['images']) or []:
+ if not isinstance(image, dict):
+ continue
+ add_thumbnail(image.get('source'))
+ resolutions = image.get('resolutions')
+ if isinstance(resolutions, list):
+ for resolution in resolutions:
+ add_thumbnail(resolution)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': data.get('title'),
+ 'thumbnails': thumbnails,
+ 'timestamp': float_or_none(data.get('created_utc')),
+ 'uploader': data.get('author'),
+ 'duration': int_or_none(try_get(
+ data,
+ (lambda x: x['media']['reddit_video']['duration'],
+ lambda x: x['secure_media']['reddit_video']['duration']))),
+ 'like_count': int_or_none(data.get('ups')),
+ 'dislike_count': int_or_none(data.get('downs')),
+ 'comment_count': int_or_none(data.get('num_comments')),
+ 'age_limit': age_limit,
+ }
diff --git a/hypervideo_dl/extractor/redtube.py b/hypervideo_dl/extractor/redtube.py
new file mode 100644
index 0000000..a1ca791
--- /dev/null
+++ b/hypervideo_dl/extractor/redtube.py
@@ -0,0 +1,136 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ str_to_int,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class RedTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.redtube.com/66418',
+ 'md5': 'fc08071233725f26b8f014dba9590005',
+ 'info_dict': {
+ 'id': '66418',
+ 'ext': 'mp4',
+ 'title': 'Sucked on a toilet',
+ 'upload_date': '20110811',
+ 'duration': 596,
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://it.redtube.com/66418',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://www.redtube.com/%s' % video_id, video_id)
+
+ ERRORS = (
+ (('video-deleted-info', '>This video has been removed'), 'has been removed'),
+ (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
+ )
+
+ for patterns, message in ERRORS:
+ if any(p in webpage for p in patterns):
+ raise ExtractorError(
+ 'Video %s %s' % (video_id, message), expected=True)
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ if not info.get('title'):
+ info['title'] = self._html_search_regex(
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
+
+ formats = []
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
+ video_id, fatal=False)
+ if sources and isinstance(sources, dict):
+ for format_id, format_url in sources.items():
+ if format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ medias = self._parse_json(
+ self._search_regex(
+ r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
+ 'media definitions', default='{}'),
+ video_id, fatal=False)
+ if medias and isinstance(medias, list):
+ for media in medias:
+ format_url = url_or_none(media.get('videoUrl'))
+ if not format_url:
+ continue
+ if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ continue
+ format_id = media.get('quality')
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ if not formats:
+ video_url = self._html_search_regex(
+ r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
+ formats.append({'url': video_url})
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
+ webpage, 'upload date', default=None))
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, default=None) or self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
+ view_count = str_to_int(self._search_regex(
+ (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
+ r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
+ webpage, 'view count', default=None))
+
+ # No self-labeling, but they describe themselves as
+ # "Home of Videos Porno"
+ age_limit = 18
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ })
diff --git a/hypervideo_dl/extractor/regiotv.py b/hypervideo_dl/extractor/regiotv.py
new file mode 100644
index 0000000..e250a52
--- /dev/null
+++ b/hypervideo_dl/extractor/regiotv.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ sanitized_Request,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class RegioTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.regio-tv.de/video/395808.html',
+ 'info_dict': {
+ 'id': '395808',
+ 'ext': 'mp4',
+ 'title': 'Wir in Ludwigsburg',
+ 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!',
+ }
+ }, {
+ 'url': 'http://www.regio-tv.de/video/395808',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ key = self._search_regex(
+ r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key')
+ title = self._og_search_title(webpage)
+
+ SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>'
+
+ request = sanitized_Request(
+ 'http://v.telvi.de/',
+ SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8'))
+ video_data = self._download_xml(request, video_id, 'Downloading video XML')
+
+ NS_MAP = {
+ 'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
+ 'soap': 'http://schemas.xmlsoap.org/soap/envelope/',
+ }
+
+ video_url = xpath_text(
+ video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True)
+ thumbnail = xpath_text(
+ video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail')
+ description = self._og_search_description(
+ webpage) or self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/rentv.py b/hypervideo_dl/extractor/rentv.py
new file mode 100644
index 0000000..7c8909d
--- /dev/null
+++ b/hypervideo_dl/extractor/rentv.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ url_or_none,
+)
+
+
+class RENTVIE(InfoExtractor):
+ _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://ren.tv/video/epizod/118577',
+ 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+ 'info_dict': {
+ 'id': '118577',
+ 'ext': 'mp4',
+ 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"',
+ 'timestamp': 1472230800,
+ 'upload_date': '20160826',
+ }
+ }, {
+ 'url': 'http://ren.tv/player/118577',
+ 'only_matching': True,
+ }, {
+ 'url': 'rentv:118577',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
+ config = self._parse_json(self._search_regex(
+ r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id)
+ title = config['title']
+ formats = []
+ for video in config['src']:
+ src = url_or_none(video.get('src'))
+ if not src:
+ continue
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': config.get('description'),
+ 'thumbnail': config.get('image'),
+ 'duration': int_or_none(config.get('duration')),
+ 'timestamp': int_or_none(config.get('date')),
+ 'formats': formats,
+ }
+
+
+class RENTVArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
+ 'md5': 'ebd63c4680b167693745ab91343df1d6',
+ 'info_dict': {
+ 'id': '136472',
+ 'ext': 'mp4',
+ 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
+ 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
+ }
+ }, {
+ # TODO: invalid m3u8
+ 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+ 'info_dict': {
+ 'id': 'playlist',
+ 'ext': 'mp4',
+ 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+ 'uploader': 'ren.tv',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ entries = []
+ for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
+ media_id = config_profile.get('mediaid')
+ if not media_id:
+ continue
+ media_id = compat_str(media_id)
+ entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
+ return self.playlist_result(entries, display_id)
diff --git a/hypervideo_dl/extractor/restudy.py b/hypervideo_dl/extractor/restudy.py
new file mode 100644
index 0000000..d47fb45
--- /dev/null
+++ b/hypervideo_dl/extractor/restudy.py
@@ -0,0 +1,44 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RestudyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.restudy.dk/video/play/id/1637',
+ 'info_dict': {
+ 'id': '1637',
+ 'ext': 'flv',
+ 'title': 'Leiden-frosteffekt',
+ 'description': 'Denne video er et eksperiment med flydende kvælstof.',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage).strip()
+ description = self._og_search_description(webpage).strip()
+
+ formats = self._extract_smil_formats(
+ 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id,
+ video_id)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/reuters.py b/hypervideo_dl/extractor/reuters.py
new file mode 100644
index 0000000..9dc482d
--- /dev/null
+++ b/hypervideo_dl/extractor/reuters.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class ReutersIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
+ 'md5': '8015113643a0b12838f160b0b81cc2ee',
+ 'info_dict': {
+ 'id': '368575562',
+ 'ext': 'mp4',
+ 'title': 'San Francisco police chief resigns',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
+ video_data = js_to_json(self._search_regex(
+ r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
+ webpage, 'video data'))
+
+ def get_json_value(key, fatal=False):
+ return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
+
+ title = unescapeHTML(get_json_value('title', fatal=True))
+ mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
+
+ mas_data = self._download_json(
+ 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
+ video_id, transform_source=js_to_json)
+ formats = []
+ for f in mas_data:
+ f_url = f.get('url')
+ if not f_url:
+ continue
+ method = f.get('method')
+ if method == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ container = f.get('container')
+ ext = '3gp' if method == 'mobile' else container
+ formats.append({
+ 'format_id': ext,
+ 'url': f_url,
+ 'ext': ext,
+ 'container': container if method != 'mobile' else None,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': get_json_value('thumb'),
+ 'duration': int_or_none(get_json_value('seconds')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/reverbnation.py b/hypervideo_dl/extractor/reverbnation.py
new file mode 100644
index 0000000..4cb99c2
--- /dev/null
+++ b/hypervideo_dl/extractor/reverbnation.py
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ str_or_none,
+)
+
+
+class ReverbNationIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
+ _TESTS = [{
+ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
+ 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
+ 'info_dict': {
+ 'id': '16965047',
+ 'ext': 'mp3',
+ 'title': 'MONA LISA',
+ 'uploader': 'ALKILADOS',
+ 'uploader_id': '216429',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+
+ api_res = self._download_json(
+ 'https://api.reverbnation.com/song/%s' % song_id,
+ song_id,
+ note='Downloading information of song %s' % song_id
+ )
+
+ THUMBNAILS = ('thumbnail', 'image')
+ quality = qualities(THUMBNAILS)
+ thumbnails = []
+ for thumb_key in THUMBNAILS:
+ if api_res.get(thumb_key):
+ thumbnails.append({
+ 'url': api_res[thumb_key],
+ 'preference': quality(thumb_key)
+ })
+
+ return {
+ 'id': song_id,
+ 'title': api_res['name'],
+ 'url': api_res['url'],
+ 'uploader': api_res.get('artist', {}).get('name'),
+ 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+ 'thumbnails': thumbnails,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }
diff --git a/hypervideo_dl/extractor/rice.py b/hypervideo_dl/extractor/rice.py
new file mode 100644
index 0000000..f855719
--- /dev/null
+++ b/hypervideo_dl/extractor/rice.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ xpath_text,
+ xpath_element,
+ int_or_none,
+ parse_iso8601,
+ ExtractorError,
+)
+
+
+class RICEIE(InfoExtractor):
+ _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
+ _TEST = {
+ 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
+ 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
+ 'info_dict': {
+ 'id': 'YEWIvbhb40aqdjMD1ALSqw',
+ 'ext': 'mp4',
+ 'title': 'Active Learning in Archeology',
+ 'upload_date': '20140616',
+ 'timestamp': 1402926346,
+ }
+ }
+ _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
+
+ def _real_extract(self, url):
+ qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+ if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ portal_id = qs['PortalID'][0]
+ playlist_id = qs['DestinationID'][0]
+ content_id = qs['ContentID'][0]
+
+ content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={
+ 'portalId': portal_id,
+ 'playlistId': playlist_id,
+ 'contentId': content_id
+ })
+ metadata = xpath_element(content_data, './/metaData', fatal=True)
+ title = xpath_text(metadata, 'primaryTitle', fatal=True)
+ encodings = xpath_element(content_data, './/encodings', fatal=True)
+ player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={
+ 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True),
+ 'contentId': content_id,
+ })
+
+ common_fmt = {}
+ dimensions = xpath_text(encodings, 'dimensions')
+ if dimensions:
+ wh = dimensions.split('x')
+ if len(wh) == 2:
+ common_fmt.update({
+ 'width': int_or_none(wh[0]),
+ 'height': int_or_none(wh[1]),
+ })
+
+ formats = []
+ rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS))
+ if rtsp_path:
+ fmt = {
+ 'url': rtsp_path,
+ 'format_id': 'rtsp',
+ }
+ fmt.update(common_fmt)
+ formats.append(fmt)
+ for source in player_data.findall(self._xpath_ns('.//Source', self._NS)):
+ video_url = xpath_text(source, self._xpath_ns('File', self._NS))
+ if not video_url:
+ continue
+ if '.m3u8' in video_url:
+ formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ fmt = {
+ 'url': video_url,
+ 'format_id': video_url.split(':')[0],
+ }
+ fmt.update(common_fmt)
+ rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+ if rtmp:
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ })
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for content_asset in content_data.findall('.//contentAssets'):
+ asset_type = xpath_text(content_asset, 'type')
+ if asset_type == 'image':
+ image_url = xpath_text(content_asset, 'httpPath')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'id': xpath_text(content_asset, 'ID'),
+ 'url': image_url,
+ })
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'description': xpath_text(metadata, 'abstract'),
+ 'duration': int_or_none(xpath_text(metadata, 'duration')),
+ 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/rmcdecouverte.py b/hypervideo_dl/extractor/rmcdecouverte.py
new file mode 100644
index 0000000..c3623ed
--- /dev/null
+++ b/hypervideo_dl/extractor/rmcdecouverte.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveLegacyIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import smuggle_url
+
+
+class RMCDecouverteIE(InfoExtractor):
+ _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))'
+
+ _TESTS = [{
+ 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
+ 'info_dict': {
+ 'id': '5983675500001',
+ 'ext': 'mp4',
+ 'title': 'CORVETTE',
+ 'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
+ 'uploader_id': '1969646226001',
+ 'upload_date': '20181226',
+ 'timestamp': 1545861635,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'only available for a week',
+ }, {
+ # live, geo restricted, bypassable
+ 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id') or mobj.group('live_id')
+ webpage = self._download_webpage(url, display_id)
+ brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+ if brightcove_legacy_url:
+ brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
+ brightcove_legacy_url).query)['@videoPlayer'][0]
+ else:
+ brightcove_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
+ return self.url_result(
+ smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['FR']}),
+ 'BrightcoveNew', brightcove_id)
diff --git a/hypervideo_dl/extractor/ro220.py b/hypervideo_dl/extractor/ro220.py
new file mode 100644
index 0000000..69934ef
--- /dev/null
+++ b/hypervideo_dl/extractor/ro220.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class Ro220IE(InfoExtractor):
+ IE_NAME = '220.ro'
+ _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/',
+ 'md5': '03af18b73a07b4088753930db7a34add',
+ 'info_dict': {
+ 'id': 'LYV6doKo7f',
+ 'ext': 'mp4',
+ 'title': 'Luati-le Banii sez 4 ep 1',
+ 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ url = compat_urllib_parse_unquote(self._search_regex(
+ r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url'))
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': url,
+ 'ext': 'mp4',
+ }]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/rockstargames.py b/hypervideo_dl/extractor/rockstargames.py
new file mode 100644
index 0000000..cd6904b
--- /dev/null
+++ b/hypervideo_dl/extractor/rockstargames.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class RockstarGamesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.rockstargames.com/videos/video/11544/',
+ 'md5': '03b5caa6e357a4bd50e3143fc03e5733',
+ 'info_dict': {
+ 'id': '11544',
+ 'ext': 'mp4',
+ 'title': 'Further Adventures in Finance and Felony Trailer',
+ 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1464876000,
+ 'upload_date': '20160602',
+ }
+ }, {
+ 'url': 'http://www.rockstargames.com/videos#/?video=48',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://www.rockstargames.com/videoplayer/videos/get-video.json',
+ video_id, query={
+ 'id': video_id,
+ 'locale': 'en_us',
+ })['video']
+
+ title = video['title']
+
+ formats = []
+ for video in video['files_processed']['video/mp4']:
+ if not video.get('src'):
+ continue
+ resolution = video.get('resolution')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', resolution or '', 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(video['src']),
+ 'format_id': resolution,
+ 'height': height,
+ })
+
+ if not formats:
+ youtube_id = video.get('youtube_id')
+ if youtube_id:
+ return self.url_result(youtube_id, 'Youtube')
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': self._proto_relative_url(video.get('screencap')),
+ 'timestamp': parse_iso8601(video.get('created')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/roosterteeth.py b/hypervideo_dl/extractor/roosterteeth.py
new file mode 100644
index 0000000..8883639
--- /dev/null
+++ b/hypervideo_dl/extractor/roosterteeth.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ urlencode_postdata,
+)
+
+
+class RoosterTeethIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
+ _NETRC_MACHINE = 'roosterteeth'
+ _TESTS = [{
+ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'md5': 'e2bd7764732d785ef797700a2489f212',
+ 'info_dict': {
+ 'id': '9156',
+ 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'ext': 'mp4',
+ 'title': 'Million Dollars, But... The Game Announcement',
+ 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'series': 'Million Dollars, But...',
+ 'episode': 'Million Dollars, But... The Game Announcement',
+ },
+ }, {
+ 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
+ 'only_matching': True,
+ }, {
+ # only available for FIRST members
+ 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'only_matching': True,
+ }]
+ _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ try:
+ self._download_json(
+ 'https://auth.roosterteeth.com/oauth/token',
+ None, 'Logging in', data=urlencode_postdata({
+ 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ }))
+ except ExtractorError as e:
+ msg = 'Unable to login'
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
+ if resp:
+ error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
+ if error:
+ msg += ': ' + error
+ self.report_warning(msg)
+
+ def _real_initialize(self):
+ if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
+ return
+ self._login()
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ api_episode_url = self._EPISODE_BASE_URL + display_id
+
+ try:
+ m3u8_url = self._download_json(
+ api_episode_url + '/videos', display_id,
+ 'Downloading video JSON metadata')['data'][0]['attributes']['url']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
+ self.raise_login_required(
+ '%s is only available for FIRST members' % display_id)
+ raise
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ episode = self._download_json(
+ api_episode_url, display_id,
+ 'Downloading episode JSON metadata')['data'][0]
+ attributes = episode['attributes']
+ title = attributes.get('title') or attributes['display_title']
+ video_id = compat_str(episode['id'])
+
+ thumbnails = []
+ for image in episode.get('included', {}).get('images', []):
+ if image.get('type') == 'episode_image':
+ img_attributes = image.get('attributes') or {}
+ for k in ('thumb', 'small', 'medium', 'large'):
+ img_url = img_attributes.get(k)
+ if img_url:
+ thumbnails.append({
+ 'id': k,
+ 'url': img_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': attributes.get('description') or attributes.get('caption'),
+ 'thumbnails': thumbnails,
+ 'series': attributes.get('show_title'),
+ 'season_number': int_or_none(attributes.get('season_number')),
+ 'season_id': attributes.get('season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(attributes.get('number')),
+ 'episode_id': str_or_none(episode.get('uuid')),
+ 'formats': formats,
+ 'channel_id': attributes.get('channel_id'),
+ 'duration': int_or_none(attributes.get('length')),
+ }
diff --git a/hypervideo_dl/extractor/rottentomatoes.py b/hypervideo_dl/extractor/rottentomatoes.py
new file mode 100644
index 0000000..14c8e82
--- /dev/null
+++ b/hypervideo_dl/extractor/rottentomatoes.py
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .internetvideoarchive import InternetVideoArchiveIE
+
+
+class RottenTomatoesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
+ 'info_dict': {
+ 'id': '11028566',
+ 'ext': 'mp4',
+ 'title': 'Toy Story 3',
+ 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id,
+ 'ie_key': InternetVideoArchiveIE.ie_key(),
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ }
diff --git a/hypervideo_dl/extractor/roxwel.py b/hypervideo_dl/extractor/roxwel.py
new file mode 100644
index 0000000..6528464
--- /dev/null
+++ b/hypervideo_dl/extractor/roxwel.py
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate, determine_ext
+
+
+class RoxwelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
+
+ _TEST = {
+ 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
+ 'info_dict': {
+ 'id': 'passionpittakeawalklive',
+ 'ext': 'flv',
+ 'title': 'Take A Walk (live)',
+ 'uploader': 'Passion Pit',
+ 'uploader_id': 'passionpit',
+ 'upload_date': '20120928',
+ 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ filename = mobj.group('filename')
+ info_url = 'http://www.roxwel.com/api/videos/%s' % filename
+ info = self._download_json(info_url, filename)
+
+ rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
+ best_rate = rtmp_rates[-1]
+ url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
+ rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
+ ext = determine_ext(rtmp_url)
+ if ext == 'f4v':
+ rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
+
+ return {
+ 'id': filename,
+ 'title': info['title'],
+ 'url': rtmp_url,
+ 'ext': 'flv',
+ 'description': info['description'],
+ 'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
+ 'uploader': info['artist'],
+ 'uploader_id': info['artistname'],
+ 'upload_date': unified_strdate(info['dbdate']),
+ }
diff --git a/hypervideo_dl/extractor/rozhlas.py b/hypervideo_dl/extractor/rozhlas.py
new file mode 100644
index 0000000..fccf694
--- /dev/null
+++ b/hypervideo_dl/extractor/rozhlas.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_start,
+)
+
+
+class RozhlasIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://prehravac.rozhlas.cz/audio/3421320',
+ 'md5': '504c902dbc9e9a1fd50326eccf02a7e2',
+ 'info_dict': {
+ 'id': '3421320',
+ 'ext': 'mp3',
+ 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)',
+ 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let'
+ }
+ }, {
+ 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id)
+
+ title = self._html_search_regex(
+ r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
+ webpage, 'title', default=None) or remove_start(
+ self._og_search_title(webpage), 'Radio Wave - ')
+ description = self._html_search_regex(
+ r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
+ webpage, 'description', fatal=False, group='url')
+ duration = int_or_none(self._search_regex(
+ r'data-duration=["\'](\d+)', webpage, 'duration', default=None))
+
+ return {
+ 'id': audio_id,
+ 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'vcodec': 'none',
+ }
diff --git a/hypervideo_dl/extractor/rtbf.py b/hypervideo_dl/extractor/rtbf.py
new file mode 100644
index 0000000..3b0f308
--- /dev/null
+++ b/hypervideo_dl/extractor/rtbf.py
@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+)
+
+
+class RTBFIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?rtbf\.be/
+ (?:
+ video/[^?]+\?.*\bid=|
+ ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
+ auvio/[^/]+\?.*\b(?P<live>l)?id=
+ )(?P<id>\d+)'''
+ _TESTS = [{
+ 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
+ 'md5': '8c876a1cceeb6cf31b476461ade72384',
+ 'info_dict': {
+ 'id': '1921274',
+ 'ext': 'mp4',
+ 'title': 'Les Diables au coeur (épisode 2)',
+ 'description': '(du 25/04/2014)',
+ 'duration': 3099.54,
+ 'upload_date': '20140425',
+ 'timestamp': 1398456300,
+ }
+ }, {
+ # geo restricted
+ 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
+ 'only_matching': True,
+ }, {
+ # Live
+ 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
+ 'only_matching': True,
+ }, {
+ # Audio
+ 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
+ 'only_matching': True,
+ }, {
+ # With Subtitle
+ 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
+ 'only_matching': True,
+ }]
+ _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
+ _PROVIDERS = {
+ 'YOUTUBE': 'Youtube',
+ 'DAILYMOTION': 'Dailymotion',
+ 'VIMEO': 'Vimeo',
+ }
+ _QUALITIES = [
+ ('mobile', 'SD'),
+ ('web', 'MD'),
+ ('high', 'HD'),
+ ]
+
+ def _real_extract(self, url):
+ live, media_id = re.match(self._VALID_URL, url).groups()
+ embed_page = self._download_webpage(
+ 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
+ media_id, query={'id': media_id})
+ data = self._parse_json(self._html_search_regex(
+ r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
+
+ error = data.get('error')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ provider = data.get('provider')
+ if provider in self._PROVIDERS:
+ return self.url_result(data['url'], self._PROVIDERS[provider])
+
+ title = data['title']
+ is_live = data.get('isLive')
+ if is_live:
+ title = self._live_title(title)
+ height_re = r'-(\d+)p\.'
+ formats = []
+
+ m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
+ http_url = data.get('url')
+ if formats and http_url and re.search(height_re, http_url):
+ http_url = fix_url(http_url)
+ for m3u8_f in formats[:]:
+ height = m3u8_f.get('height')
+ if not height:
+ continue
+ f = m3u8_f.copy()
+ del f['protocol']
+ f.update({
+ 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
+ 'url': re.sub(height_re, '-%dp.' % height, http_url),
+ })
+ formats.append(f)
+ else:
+ sources = data.get('sources') or {}
+ for key, format_id in self._QUALITIES:
+ format_url = sources.get(key)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ height_re, format_url, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': fix_url(format_url),
+ 'height': height,
+ })
+
+ mpd_url = data.get('urlDash')
+ if not data.get('drm') and mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, media_id, mpd_id='dash', fatal=False))
+
+ audio_url = data.get('urlAudio')
+ if audio_url:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for track in (data.get('tracks') or {}).values():
+ sub_url = track.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(track.get('lang') or 'fr', []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'thumbnail': data.get('thumbnail'),
+ 'duration': float_or_none(data.get('realDuration')),
+ 'timestamp': int_or_none(data.get('liveFrom')),
+ 'series': data.get('programLabel'),
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/rte.py b/hypervideo_dl/extractor/rte.py
new file mode 100644
index 0000000..1fbc729
--- /dev/null
+++ b/hypervideo_dl/extractor/rte.py
@@ -0,0 +1,167 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ float_or_none,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ ExtractorError,
+)
+
+
+class RteBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+
+ info_dict = {}
+ formats = []
+
+ ENDPOINTS = (
+ 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
+ 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
+ )
+
+ for num, ep_url in enumerate(ENDPOINTS, start=1):
+ try:
+ data = self._download_json(ep_url + item_id, item_id)
+ except ExtractorError as ee:
+ if num < len(ENDPOINTS) or formats:
+ continue
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
+ if error_info:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_info['message']),
+ expected=True)
+ raise
+
+ # NB the string values in the JSON are stored using XML escaping(!)
+ show = try_get(data, lambda x: x['shows'][0], dict)
+ if not show:
+ continue
+
+ if not info_dict:
+ title = unescapeHTML(show['title'])
+ description = unescapeHTML(show.get('description'))
+ thumbnail = show.get('thumbnail')
+ duration = float_or_none(show.get('duration'), 1000)
+ timestamp = parse_iso8601(show.get('published'))
+ info_dict = {
+ 'id': item_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ }
+
+ mg = try_get(show, lambda x: x['media:group'][0], dict)
+ if not mg:
+ continue
+
+ if mg.get('url'):
+ m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
+ if m:
+ m = m.groupdict()
+ formats.append({
+ 'url': m['url'] + '/' + m['app'],
+ 'app': m['app'],
+ 'play_path': m['playpath'],
+ 'player_url': url,
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ })
+
+ if mg.get('hls_server') and mg.get('hls_url'):
+ formats.extend(self._extract_m3u8_formats(
+ mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ if mg.get('hds_server') and mg.get('hds_url'):
+ formats.extend(self._extract_f4m_formats(
+ mg['hds_server'] + mg['hds_url'], item_id,
+ f4m_id='hds', fatal=False))
+
+ mg_rte_server = str_or_none(mg.get('rte:server'))
+ mg_url = str_or_none(mg.get('url'))
+ if mg_rte_server and mg_url:
+ hds_url = url_or_none(mg_rte_server + mg_url)
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(
+ hds_url, item_id, f4m_id='hds', fatal=False))
+
+ self._sort_formats(formats)
+
+ info_dict['formats'] = formats
+ return info_dict
+
+
+class RteIE(RteBaseIE):
+ IE_NAME = 'rte'
+ IE_DESC = 'Raidió Teilifís Éireann TV'
+ _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
+ 'md5': '4a76eb3396d98f697e6e8110563d2604',
+ 'info_dict': {
+ 'id': '10478715',
+ 'ext': 'mp4',
+ 'title': 'iWitness',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'The spirit of Ireland, one voice and one minute at a time.',
+ 'duration': 60.046,
+ 'upload_date': '20151012',
+ 'timestamp': 1444694160,
+ },
+ }
+
+
+class RteRadioIE(RteBaseIE):
+ IE_NAME = 'rte:radio'
+ IE_DESC = 'Raidió Teilifís Éireann radio'
+ # Radioplayer URLs have two distinct specifier formats,
+ # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
+ # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
+ # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
+ # An <id> uniquely defines an individual recording, and is the only part we require.
+ _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ # Old-style player URL; HLS and RTMPE formats
+ 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
+ 'md5': 'c79ccb2c195998440065456b69760411',
+ 'info_dict': {
+ 'id': '10507902',
+ 'ext': 'mp4',
+ 'title': 'Gloria',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
+ 'timestamp': 1451203200,
+ 'upload_date': '20151227',
+ 'duration': 7230.0,
+ },
+ }, {
+ # New-style player URL; RTMPE formats only
+ 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
+ 'info_dict': {
+ 'id': '3250678',
+ 'ext': 'flv',
+ 'title': 'The Lyric Concert with Paul Herriott',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': '',
+ 'timestamp': 1333742400,
+ 'upload_date': '20120406',
+ 'duration': 7199.016,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }]
diff --git a/hypervideo_dl/extractor/rtl2.py b/hypervideo_dl/extractor/rtl2.py
new file mode 100644
index 0000000..70f000c
--- /dev/null
+++ b/hypervideo_dl/extractor/rtl2.py
@@ -0,0 +1,207 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+ compat_str,
+)
+from ..utils import (
+ bytes_to_intlist,
+ ExtractorError,
+ intlist_to_bytes,
+ int_or_none,
+ strip_or_none,
+)
+
+
+class RTL2IE(InfoExtractor):
+ IE_NAME = 'rtl2'
+ _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
+ 'info_dict': {
+ 'id': 'folge-203-0',
+ 'ext': 'f4v',
+ 'title': 'GRIP sucht den Sommerkönig',
+ 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
+ 'info_dict': {
+ 'id': 'anna-erwischt-alex',
+ 'ext': 'mp4',
+ 'title': 'Anna erwischt Alex!',
+ 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ }]
+
+ def _real_extract(self, url):
+ vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
+ if not vico_id:
+ webpage = self._download_webpage(url, display_id)
+
+ mobj = re.search(
+ r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+ webpage)
+ if mobj:
+ vico_id = mobj.group('vico_id')
+ vivi_id = mobj.group('vivi_id')
+ else:
+ vico_id = self._html_search_regex(
+ r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+ vivi_id = self._html_search_regex(
+ r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+
+ info = self._download_json(
+ 'https://service.rtl2.de/api-player-vipo/video.php',
+ display_id, query={
+ 'vico_id': vico_id,
+ 'vivi_id': vivi_id,
+ })
+ video_info = info['video']
+ title = video_info['titel']
+
+ formats = []
+
+ rtmp_url = video_info.get('streamurl')
+ if rtmp_url:
+ rtmp_url = rtmp_url.replace('\\', '')
+ stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
+ rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
+
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'play_path': stream_url,
+ 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
+ 'page_url': url,
+ 'flash_version': 'LNX 11,2,202,429',
+ 'rtmp_conn': rtmp_conn,
+ 'no_resume': True,
+ 'preference': 1,
+ })
+
+ m3u8_url = video_info.get('streamurl_hls')
+ if m3u8_url:
+ formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': title,
+ 'thumbnail': video_info.get('image'),
+ 'description': video_info.get('beschreibung'),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'formats': formats,
+ }
+
+
+class RTL2YouBaseIE(InfoExtractor):
+ _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
+
+
+class RTL2YouIE(RTL2YouBaseIE):
+ IE_NAME = 'rtl2:you'
+ _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
+ 'info_dict': {
+ 'id': '15740',
+ 'ext': 'mp4',
+ 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!',
+ 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
+ 'age_limit': 12,
+ },
+ }, {
+ 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
+ 'only_matching': True,
+ }]
+ _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
+ _GEO_COUNTRIES = ['DE']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ stream_data = self._download_json(
+ self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
+
+ data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
+ stream_url = intlist_to_bytes(aes_cbc_decrypt(
+ bytes_to_intlist(compat_b64decode(data)),
+ bytes_to_intlist(self._AES_KEY),
+ bytes_to_intlist(compat_b64decode(iv))
+ ))
+ if b'rtl2_you_video_not_found' in stream_url:
+ raise ExtractorError('video not found', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ stream_url[:-compat_ord(stream_url[-1])].decode(),
+ video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ video_data = self._download_json(
+ self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
+
+ series = video_data.get('formatTitle')
+ title = episode = video_data.get('title') or series
+ if series and series != title:
+ title = '%s - %s' % (series, title)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': strip_or_none(video_data.get('description')),
+ 'thumbnail': video_data.get('image'),
+ 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
+ 'series': series,
+ 'episode': episode,
+ 'age_limit': int_or_none(video_data.get('minimumAge')),
+ }
+
+
+class RTL2YouSeriesIE(RTL2YouBaseIE):
+ IE_NAME = 'rtl2:you:series'
+ _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://you.rtl2.de/videos/115/dragon-ball',
+ 'info_dict': {
+ 'id': '115',
+ },
+ 'playlist_mincount': 5,
+ }
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ stream_data = self._download_json(
+ self._BACKWERK_BASE_URL + 'videos',
+ series_id, query={
+ 'formatId': series_id,
+ 'limit': 1000000000,
+ })
+
+ entries = []
+ for video in stream_data.get('videos', []):
+ video_id = compat_str(video['videoId'])
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
+ 'RTL2You', video_id))
+ return self.playlist_result(entries, series_id)
diff --git a/hypervideo_dl/extractor/rtlnl.py b/hypervideo_dl/extractor/rtlnl.py
new file mode 100644
index 0000000..9eaa06f
--- /dev/null
+++ b/hypervideo_dl/extractor/rtlnl.py
@@ -0,0 +1,146 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class RtlNlIE(InfoExtractor):
+ IE_NAME = 'rtl.nl'
+ IE_DESC = 'rtl.nl and rtlxl.nl'
+ _VALID_URL = r'''(?x)
+ https?://(?:(?:www|static)\.)?
+ (?:
+ rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
+ rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
+ embed\.rtl\.nl/\#uuid=
+ )
+ (?P<id>[0-9a-f-]+)'''
+
+ _TESTS = [{
+ # new URL schema
+ 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
+ 'md5': '490428f1187b60d714f34e1f2e3af0b6',
+ 'info_dict': {
+ 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'timestamp': 1593293400,
+ 'upload_date': '20200627',
+ 'duration': 661.08,
+ },
+ }, {
+ # old URL schema
+ 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
+ 'md5': '473d1946c1fdd050b2c0161a4b13c373',
+ 'info_dict': {
+ 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'timestamp': 1461951000,
+ 'upload_date': '20160429',
+ 'duration': 1167.96,
+ },
+ 'skip': '404',
+ }, {
+ # best format available a3t
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+ 'md5': 'dea7474214af1271d91ef332fb8be7ea',
+ 'info_dict': {
+ 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
+ 'ext': 'mp4',
+ 'timestamp': 1424039400,
+ 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
+ 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+ 'upload_date': '20150215',
+ 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
+ }
+ }, {
+ # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
+ # best format available nettv
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
+ 'info_dict': {
+ 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
+ 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+ 'timestamp': 1437233400,
+ 'upload_date': '20150718',
+ 'duration': 30.474,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # encrypted m3u8 streams, georestricted
+ 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
+ 'only_matching': True,
+ }, {
+ # new embed URL schema
+ 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uuid = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
+ uuid)
+
+ material = info['material'][0]
+ title = info['abstracts'][0]['name']
+ subtitle = material.get('title')
+ if subtitle:
+ title += ' - %s' % subtitle
+ description = material.get('synopsis')
+
+ meta = info.get('meta', {})
+
+ videopath = material['videopath']
+ m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
+ self._sort_formats(formats)
+
+ thumbnails = []
+
+ for p in ('poster_base_url', '"thumb_base_url"'):
+ if not meta.get(p):
+ continue
+
+ thumbnails.append({
+ 'url': self._proto_relative_url(meta[p] + uuid),
+ 'width': int_or_none(self._search_regex(
+ r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
+ 'height': int_or_none(self._search_regex(
+ r'/sz=[0-9]+x([0-9]+)',
+ meta[p], 'thumbnail height', fatal=False))
+ })
+
+ return {
+ 'id': uuid,
+ 'title': title,
+ 'formats': formats,
+ 'timestamp': material['original_date'],
+ 'description': description,
+ 'duration': parse_duration(material.get('duration')),
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/rtp.py b/hypervideo_dl/extractor/rtp.py
new file mode 100644
index 0000000..02986f4
--- /dev/null
+++ b/hypervideo_dl/extractor/rtp.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+)
+
+
+class RTPIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
+ _TESTS = [{
+ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+ 'md5': 'e736ce0c665e459ddb818546220b4ef8',
+ 'info_dict': {
+ 'id': 'e174042',
+ 'ext': 'mp3',
+ 'title': 'Paixões Cruzadas',
+ 'description': 'As paixões musicais de António Cartaxo e António Macedo',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_meta(
+ 'twitter:title', webpage, display_name='title', fatal=True)
+
+ config = self._parse_json(self._search_regex(
+ r'(?s)RTPPlayer\(({.+?})\);', webpage,
+ 'player config'), video_id, js_to_json)
+ file_url = config['file']
+ ext = determine_ext(file_url)
+ if ext == 'm3u8':
+ file_key = config.get('fileKey')
+ formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=file_key)
+ if file_key:
+ formats.append({
+ 'url': 'https://cdn-ondemand.rtp.pt' + file_key,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+ else:
+ formats = [{
+ 'url': file_url,
+ 'ext': ext,
+ }]
+ if config.get('mediaType') == 'audio':
+ for f in formats:
+ f['vcodec'] = 'none'
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': self._html_search_meta(['description', 'twitter:description'], webpage),
+ 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/rts.py b/hypervideo_dl/extractor/rts.py
new file mode 100644
index 0000000..aed35f8
--- /dev/null
+++ b/hypervideo_dl/extractor/rts.py
@@ -0,0 +1,235 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .srgssr import SRGSSRIE
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class RTSIE(SRGSSRIE):
+ IE_DESC = 'RTS.ch'
+ _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
+ 'md5': '753b877968ad8afaeddccc374d4256a5',
+ 'info_dict': {
+ 'id': '3449373',
+ 'display_id': 'les-enfants-terribles',
+ 'ext': 'mp4',
+ 'duration': 1488,
+ 'title': 'Les Enfants Terribles',
+ 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
+ 'uploader': 'Divers',
+ 'upload_date': '19680921',
+ 'timestamp': -40280400,
+ 'thumbnail': r're:^https?://.*\.image',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ },
+ {
+ 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
+ 'info_dict': {
+ 'id': '5624065',
+ 'title': 'Passe-moi les jumelles',
+ },
+ 'playlist_mincount': 4,
+ },
+ {
+ 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
+ 'info_dict': {
+ 'id': '5745975',
+ 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
+ 'ext': 'mp4',
+ 'duration': 48,
+ 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
+ 'description': 'Hockey - Playoff',
+ 'uploader': 'Hockey',
+ 'upload_date': '20140403',
+ 'timestamp': 1396556882,
+ 'thumbnail': r're:^https?://.*\.image',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ 'skip': 'Blocked outside Switzerland',
+ },
+ {
+ 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
+ 'md5': '9bb06503773c07ce83d3cbd793cebb91',
+ 'info_dict': {
+ 'id': '5745356',
+ 'display_id': 'londres-cachee-par-un-epais-smog',
+ 'ext': 'mp4',
+ 'duration': 33,
+ 'title': 'Londres cachée par un épais smog',
+ 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
+ 'uploader': 'L\'actu en vidéo',
+ 'upload_date': '20140403',
+ 'timestamp': 1396537322,
+ 'thumbnail': r're:^https?://.*\.image',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
+ },
+ {
+ 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
+ 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
+ 'info_dict': {
+ 'id': '5706148',
+ 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
+ 'ext': 'mp3',
+ 'duration': 123,
+ 'title': '"Urban Hippie", de Damien Krisl',
+ 'description': 'Des Hippies super glam.',
+ 'upload_date': '20140403',
+ 'timestamp': 1396551600,
+ },
+ },
+ {
+ # article with videos on rhs
+ 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html',
+ 'info_dict': {
+ 'id': '6693917',
+ 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
+ },
+ 'playlist_mincount': 5,
+ },
+ {
+ 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ media_id = m.group('rts_id') or m.group('id')
+ display_id = m.group('display_id') or media_id
+
+ def download_json(internal_id):
+ return self._download_json(
+ 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
+ display_id)
+
+ all_info = download_json(media_id)
+
+ # media_id extracted out of URL is not always a real id
+ if 'video' not in all_info and 'audio' not in all_info:
+ entries = []
+
+ for item in all_info.get('items', []):
+ item_url = item.get('url')
+ if not item_url:
+ continue
+ entries.append(self.url_result(item_url, 'RTS'))
+
+ if not entries:
+ page, urlh = self._download_webpage_handle(url, display_id)
+ if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id:
+ return self.url_result(urlh.geturl(), 'RTS')
+
+ # article with videos on rhs
+ videos = re.findall(
+ r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
+ page)
+ if not videos:
+ videos = re.findall(
+ r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
+ page)
+ if videos:
+ entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
+
+ if entries:
+ return self.playlist_result(entries, media_id, all_info.get('title'))
+
+ internal_id = self._html_search_regex(
+ r'<(?:video|audio) data-id="([0-9]+)"', page,
+ 'internal video id')
+ all_info = download_json(internal_id)
+
+ media_type = 'video' if 'video' in all_info else 'audio'
+
+ # check for errors
+ self._get_media_data('rts', media_type, media_id)
+
+ info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
+
+ title = info['title']
+
+ def extract_bitrate(url):
+ return int_or_none(self._search_regex(
+ r'-([0-9]+)k\.', url, 'bitrate', default=None))
+
+ formats = []
+ streams = info.get('streams', {})
+ for format_id, format_url in streams.items():
+ if format_id == 'hds_sd' and 'hds' in streams:
+ continue
+ if format_id == 'hls_sd' and 'hls' in streams:
+ continue
+ ext = determine_ext(format_url)
+ if ext in ('m3u8', 'f4m'):
+ format_url = self._get_tokenized_src(format_url, media_id, format_id)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0',
+ media_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'tbr': extract_bitrate(format_url),
+ })
+
+ download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '')
+ for media in info.get('media', []):
+ media_url = media.get('url')
+ if not media_url or re.match(r'https?://', media_url):
+ continue
+ rate = media.get('rate')
+ ext = media.get('ext') or determine_ext(media_url, 'mp4')
+ format_id = ext
+ if rate:
+ format_id += '-%dk' % rate
+ formats.append({
+ 'format_id': format_id,
+ 'url': urljoin(download_base, media_url),
+ 'tbr': rate or extract_bitrate(media_url),
+ })
+
+ self._check_formats(formats, media_id)
+ self._sort_formats(formats)
+
+ duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
+ if isinstance(duration, compat_str):
+ duration = parse_duration(duration)
+
+ return {
+ 'id': media_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': info.get('intro'),
+ 'duration': duration,
+ 'view_count': int_or_none(info.get('plays')),
+ 'uploader': info.get('programName'),
+ 'timestamp': parse_iso8601(info.get('broadcast_date')),
+ 'thumbnail': unescapeHTML(info.get('preview_image_url')),
+ }
diff --git a/hypervideo_dl/extractor/rtve.py b/hypervideo_dl/extractor/rtve.py
new file mode 100644
index 0000000..d2fb754
--- /dev/null
+++ b/hypervideo_dl/extractor/rtve.py
@@ -0,0 +1,268 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import io
+import re
+import sys
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_struct_unpack,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ qualities,
+ remove_end,
+ remove_start,
+ std_headers,
+)
+
+_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
+
+
+class RTVEALaCartaIE(InfoExtractor):
+ IE_NAME = 'rtve.es:alacarta'
+ IE_DESC = 'RTVE a la carta'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
+ 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
+ 'info_dict': {
+ 'id': '2491869',
+ 'ext': 'mp4',
+ 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
+ 'duration': 5024.566,
+ 'series': 'Balonmano',
+ },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
+ }, {
+ 'note': 'Live stream',
+ 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
+ 'info_dict': {
+ 'id': '1694255',
+ 'ext': 'mp4',
+ 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': 'live stream',
+ },
+ }, {
+ 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
+ 'md5': 'd850f3c8731ea53952ebab489cf81cbf',
+ 'info_dict': {
+ 'id': '4236788',
+ 'ext': 'mp4',
+ 'title': 'Servir y proteger - Capítulo 104',
+ 'duration': 3222.0,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
+ }, {
+ 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+ self._manager = self._download_json(
+ 'http://www.rtve.es/odin/loki/' + user_agent_b64,
+ None, 'Fetching manager info')['manager']
+
+ @staticmethod
+ def _decrypt_url(png):
+ encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
+ while True:
+ length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+ chunk_type = encrypted_data.read(4)
+ if chunk_type == b'IEND':
+ break
+ data = encrypted_data.read(length)
+ if chunk_type == b'tEXt':
+ alphabet_data, text = data.split(b'\0')
+ quality, url_data = text.split(b'%%')
+ alphabet = []
+ e = 0
+ d = 0
+ for l in _bytes_to_chr(alphabet_data):
+ if d == 0:
+ alphabet.append(l)
+ d = e = (e + 1) % 4
+ else:
+ d -= 1
+ url = ''
+ f = 0
+ e = 3
+ b = 1
+ for letter in _bytes_to_chr(url_data):
+ if f == 0:
+ l = int(letter) * 10
+ f = 1
+ else:
+ if e == 0:
+ l += int(letter)
+ url += alphabet[l]
+ e = (b + 3) % 4
+ f = 0
+ b += 1
+ else:
+ e -= 1
+
+ yield quality.decode(), url
+ encrypted_data.read(4) # CRC
+
+ def _extract_png_formats(self, video_id):
+ png = self._download_webpage(
+ 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
+ video_id, 'Downloading url information', query={'q': 'v2'})
+ q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+ formats = []
+ for quality, video_url in self._decrypt_url(png):
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, 'dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': quality,
+ 'quality': q(quality),
+ 'url': video_url,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+ video_id)['page']['items'][0]
+ if info['state'] == 'DESPU':
+ raise ExtractorError('The video is no longer available', expected=True)
+ title = info['title'].strip()
+ formats = self._extract_png_formats(video_id)
+
+ subtitles = None
+ sbt_file = info.get('sbtFile')
+ if sbt_file:
+ subtitles = self.extract_subtitles(video_id, sbt_file)
+
+ is_live = info.get('live') is True
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'thumbnail': info.get('image'),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'is_live': is_live,
+ 'series': info.get('programTitle'),
+ }
+
+ def _get_subtitles(self, video_id, sub_file):
+ subs = self._download_json(
+ sub_file + '.json', video_id,
+ 'Downloading subtitles info')['page']['items']
+ return dict(
+ (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+ for s in subs)
+
+
+class RTVEInfantilIE(RTVEALaCartaIE):
+ IE_NAME = 'rtve.es:infantil'
+ IE_DESC = 'RTVE infantil'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+ 'md5': '5747454717aedf9f9fdf212d1bcfc48d',
+ 'info_dict': {
+ 'id': '3040283',
+ 'ext': 'mp4',
+ 'title': 'Maneras de vivir',
+ 'thumbnail': r're:https?://.+/1426182947956\.JPG',
+ 'duration': 357.958,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
+ }]
+
+
+class RTVELiveIE(RTVEALaCartaIE):
+ IE_NAME = 'rtve.es:live'
+ IE_DESC = 'RTVE.es live streams'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/directo/la-1/',
+ 'info_dict': {
+ 'id': 'la-1',
+ 'ext': 'mp4',
+ 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ 'skip_download': 'live stream',
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
+ title = remove_start(title, 'Estoy viendo ')
+
+ vidplayer_id = self._search_regex(
+ (r'playerId=player([0-9]+)',
+ r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
+ r'data-id=["\'](\d+)'),
+ webpage, 'internal video ID')
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title),
+ 'formats': self._extract_png_formats(vidplayer_id),
+ 'is_live': True,
+ }
+
+
+class RTVETelevisionIE(InfoExtractor):
+ IE_NAME = 'rtve.es:television'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
+
+ _TEST = {
+ 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
+ 'info_dict': {
+ 'id': '3069778',
+ 'ext': 'mp4',
+ 'title': 'Documentos TV - La revolución del móvil',
+ 'duration': 3496.948,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ alacarta_url = self._search_regex(
+ r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&',
+ webpage, 'alacarta url', default=None)
+ if alacarta_url is None:
+ raise ExtractorError(
+ 'The webpage doesn\'t contain any video', expected=True)
+
+ return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
diff --git a/hypervideo_dl/extractor/rtvnh.py b/hypervideo_dl/extractor/rtvnh.py
new file mode 100644
index 0000000..6a00f70
--- /dev/null
+++ b/hypervideo_dl/extractor/rtvnh.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class RTVNHIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.rtvnh.nl/video/131946',
+ 'md5': 'cdbec9f44550763c8afc96050fa747dc',
+ 'info_dict': {
+ 'id': '131946',
+ 'ext': 'mp4',
+ 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
+ 'thumbnail': r're:^https?:.*\.jpg$'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ meta = self._parse_json(self._download_webpage(
+ 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)
+
+ status = meta.get('status')
+ if status != 200:
+ raise ExtractorError(
+ '%s returned error code %d' % (self.IE_NAME, status), expected=True)
+
+ formats = []
+ rtmp_formats = self._extract_smil_formats(
+ 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
+ formats.extend(rtmp_formats)
+
+ for rtmp_format in rtmp_formats:
+ rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+ rtsp_format = rtmp_format.copy()
+ del rtsp_format['play_path']
+ del rtsp_format['ext']
+ rtsp_format.update({
+ 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+ 'url': rtmp_url.replace('rtmp://', 'rtsp://'),
+ 'protocol': 'rtsp',
+ })
+ formats.append(rtsp_format)
+ http_base_url = rtmp_url.replace('rtmp://', 'http://')
+ formats.extend(self._extract_m3u8_formats(
+ http_base_url + '/playlist.m3u8', video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ http_base_url + '/manifest.f4m',
+ video_id, f4m_id='hds', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': meta['title'].strip(),
+ 'thumbnail': meta.get('image'),
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/rtvs.py b/hypervideo_dl/extractor/rtvs.py
new file mode 100644
index 0000000..6573b26
--- /dev/null
+++ b/hypervideo_dl/extractor/rtvs.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RTVSIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)'
+ _TESTS = [{
+ # radio archive
+ 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872',
+ 'md5': '134d5d6debdeddf8a5d761cbc9edacb8',
+ 'info_dict': {
+ 'id': '414872',
+ 'ext': 'mp3',
+ 'title': 'Ostrov pokladov 1 časť.mp3'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # tv archive
+ 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118',
+ 'md5': '85e2c55cf988403b70cac24f5c086dc6',
+ 'info_dict': {
+ 'id': '63118',
+ 'ext': 'mp4',
+ 'title': 'Amaro Džives - Náš deň',
+ 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist_url = self._search_regex(
+ r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'playlist url', group='url')
+
+ data = self._download_json(
+ playlist_url, video_id, 'Downloading playlist')[0]
+ return self._parse_jwplayer_data(data, video_id=video_id)
diff --git a/hypervideo_dl/extractor/ruhd.py b/hypervideo_dl/extractor/ruhd.py
new file mode 100644
index 0000000..3c8053a
--- /dev/null
+++ b/hypervideo_dl/extractor/ruhd.py
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RUHDIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.ruhd.ru/play.php?vid=207',
+ 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
+ 'info_dict': {
+ 'id': '207',
+ 'ext': 'divx',
+ 'title': 'КОТ бааааам',
+ 'description': 'классный кот)',
+ 'thumbnail': r're:^http://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'<param name="src" value="([^"]+)"', webpage, 'video url')
+ title = self._html_search_regex(
+ r'<title>([^<]+)&nbsp;&nbsp; RUHD\.ru - Видео Высокого качества №1 в России!</title>',
+ webpage, 'title')
+ description = self._html_search_regex(
+ r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
+ webpage, 'description', fatal=False)
+ thumbnail = self._html_search_regex(
+ r'<param name="previewImage" value="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = 'http://www.ruhd.ru' + thumbnail
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/rumble.py b/hypervideo_dl/extractor/rumble.py
new file mode 100644
index 0000000..4a02251
--- /dev/null
+++ b/hypervideo_dl/extractor/rumble.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class RumbleEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'https://rumble.com/embed/v5pv5f',
+ 'md5': '36a18a049856720189f30977ccbb2c34',
+ 'info_dict': {
+ 'id': 'v5pv5f',
+ 'ext': 'mp4',
+ 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
+ 'timestamp': 1571611968,
+ 'upload_date': '20191020',
+ }
+ }, {
+ 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://rumble.com/embedJS/', video_id,
+ query={'request': 'video', 'v': video_id})
+ title = video['title']
+
+ formats = []
+ for height, ua in (video.get('ua') or {}).items():
+ for i in range(2):
+ f_url = try_get(ua, lambda x: x[i], compat_str)
+ if f_url:
+ ext = determine_ext(f_url)
+ f = {
+ 'ext': ext,
+ 'format_id': '%s-%sp' % (ext, height),
+ 'height': int_or_none(height),
+ 'url': f_url,
+ }
+ bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
+ if bitrate:
+ f['tbr'] = int_or_none(bitrate)
+ formats.append(f)
+ self._sort_formats(formats)
+
+ author = video.get('author') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video.get('i'),
+ 'timestamp': parse_iso8601(video.get('pubDate')),
+ 'channel': author.get('name'),
+ 'channel_url': author.get('url'),
+ 'duration': int_or_none(video.get('duration')),
+ }
diff --git a/hypervideo_dl/extractor/rutube.py b/hypervideo_dl/extractor/rutube.py
new file mode 100644
index 0000000..8f54d56
--- /dev/null
+++ b/hypervideo_dl/extractor/rutube.py
@@ -0,0 +1,313 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ bool_or_none,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class RutubeBaseIE(InfoExtractor):
+ def _download_api_info(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/video/%s/' % video_id,
+ video_id, 'Downloading video JSON',
+ 'Unable to download video JSON', query=query)
+
+ @staticmethod
+ def _extract_info(video, video_id=None, require_title=True):
+ title = video['title'] if require_title else video.get('title')
+
+ age_limit = video.get('is_adult')
+ if age_limit is not None:
+ age_limit = 18 if age_limit is True else 0
+
+ uploader_id = try_get(video, lambda x: x['author']['id'])
+ category = try_get(video, lambda x: x['category']['name'])
+
+ return {
+ 'id': video.get('id') or video_id if video_id else video['id'],
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': int_or_none(video.get('duration')),
+ 'uploader': try_get(video, lambda x: x['author']['name']),
+ 'uploader_id': compat_str(uploader_id) if uploader_id else None,
+ 'timestamp': unified_timestamp(video.get('created_ts')),
+ 'category': [category] if category else None,
+ 'age_limit': age_limit,
+ 'view_count': int_or_none(video.get('hits')),
+ 'comment_count': int_or_none(video.get('comments_count')),
+ 'is_live': bool_or_none(video.get('is_livestream')),
+ }
+
+ def _download_and_extract_info(self, video_id, query=None):
+ return self._extract_info(
+ self._download_api_info(video_id, query=query), video_id)
+
+ def _download_api_options(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/play/options/%s/' % video_id,
+ video_id, 'Downloading options JSON',
+ 'Unable to download options JSON',
+ headers=self.geo_verification_headers(), query=query)
+
+ def _extract_formats(self, options, video_id):
+ formats = []
+ for format_id, format_url in options['video_balancer'].items():
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def _download_and_extract_formats(self, video_id, query=None):
+ return self._extract_formats(
+ self._download_api_options(video_id, query=query), video_id)
+
+
+class RutubeIE(RutubeBaseIE):
+ IE_NAME = 'rutube'
+ IE_DESC = 'Rutube videos'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
+
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
+ 'md5': '1d24f180fac7a02f3900712e5a5764d6',
+ 'info_dict': {
+ 'id': '3eac3b4561676c17df9132a9a1e62e3e',
+ 'ext': 'mp4',
+ 'title': 'Раненный кенгуру забежал в аптеку',
+ 'description': 'http://www.ntdtv.ru ',
+ 'duration': 81,
+ 'uploader': 'NTDRussian',
+ 'uploader_id': '29790',
+ 'timestamp': 1381943602,
+ 'upload_date': '20131016',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_and_extract_info(video_id)
+ info['formats'] = self._download_and_extract_formats(video_id)
+ return info
+
+
+class RutubeEmbedIE(RutubeBaseIE):
+ IE_NAME = 'rutube:embed'
+ IE_DESC = 'Rutube embedded videos'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
+ 'info_dict': {
+ 'id': 'a10e53b86e8f349080f718582ce4c661',
+ 'ext': 'mp4',
+ 'timestamp': 1387830582,
+ 'upload_date': '20131223',
+ 'uploader_id': '297833',
+ 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
+ 'uploader': 'subziro89 ILya',
+ 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://rutube.ru/play/embed/8083783',
+ 'only_matching': True,
+ }, {
+ # private video
+ 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+ # Query may contain private videos token and should be passed to API
+ # requests (see #19163)
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ options = self._download_api_options(embed_id, query)
+ video_id = options['effective_video']
+ formats = self._extract_formats(options, video_id)
+ info = self._download_and_extract_info(video_id, query)
+ info.update({
+ 'extractor_key': 'Rutube',
+ 'formats': formats,
+ })
+ return info
+
+
+class RutubePlaylistBaseIE(RutubeBaseIE):
+ def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
+ return self._PAGE_TEMPLATE % (playlist_id, page_num)
+
+ def _entries(self, playlist_id, *args, **kwargs):
+ next_page_url = None
+ for pagenum in itertools.count(1):
+ page = self._download_json(
+ next_page_url or self._next_page_url(
+ pagenum, playlist_id, *args, **kwargs),
+ playlist_id, 'Downloading page %s' % pagenum)
+
+ results = page.get('results')
+ if not results or not isinstance(results, list):
+ break
+
+ for result in results:
+ video_url = url_or_none(result.get('video_url'))
+ if not video_url:
+ continue
+ entry = self._extract_info(result, require_title=False)
+ entry.update({
+ '_type': 'url',
+ 'url': video_url,
+ 'ie_key': RutubeIE.ie_key(),
+ })
+ yield entry
+
+ next_page_url = page.get('next')
+ if not next_page_url or not page.get('has_next'):
+ break
+
+ def _extract_playlist(self, playlist_id, *args, **kwargs):
+ return self.playlist_result(
+ self._entries(playlist_id, *args, **kwargs),
+ playlist_id, kwargs.get('playlist_name'))
+
+ def _real_extract(self, url):
+ return self._extract_playlist(self._match_id(url))
+
+
+class RutubeChannelIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:channel'
+ IE_DESC = 'Rutube channels'
+ _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://rutube.ru/tags/video/1800/',
+ 'info_dict': {
+ 'id': '1800',
+ },
+ 'playlist_mincount': 68,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
+
+
+class RutubeMovieIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:movie'
+ IE_DESC = 'Rutube movies'
+ _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+ _TESTS = []
+
+ _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
+
+ def _real_extract(self, url):
+ movie_id = self._match_id(url)
+ movie = self._download_json(
+ self._MOVIE_TEMPLATE % movie_id, movie_id,
+ 'Downloading movie JSON')
+ return self._extract_playlist(
+ movie_id, playlist_name=movie.get('name'))
+
+
+class RutubePersonIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:person'
+ IE_DESC = 'Rutube person videos'
+ _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/person/313878/',
+ 'info_dict': {
+ 'id': '313878',
+ },
+ 'playlist_mincount': 37,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
+
+
+class RutubePlaylistIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:playlist'
+ IE_DESC = 'Rutube playlists'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
+ 'info_dict': {
+ 'id': '3097',
+ },
+ 'playlist_count': 27,
+ }, {
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
+ 'only_matching': True,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
+
+ @classmethod
+ def suitable(cls, url):
+ if not super(RutubePlaylistIE, cls).suitable(url):
+ return False
+ params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
+
+ def _next_page_url(self, page_num, playlist_id, item_kind):
+ return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
+
+ def _real_extract(self, url):
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ playlist_kind = qs['pl_type'][0]
+ playlist_id = qs['pl_id'][0]
+ return self._extract_playlist(playlist_id, item_kind=playlist_kind)
diff --git a/hypervideo_dl/extractor/rutv.py b/hypervideo_dl/extractor/rutv.py
new file mode 100644
index 0000000..d2713c1
--- /dev/null
+++ b/hypervideo_dl/extractor/rutv.py
@@ -0,0 +1,211 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none
+)
+
+
+class RUTVIE(InfoExtractor):
+ IE_DESC = 'RUTV.RU'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
+ (?P<path>
+ flash\d+v/container\.swf\?id=|
+ iframe/(?P<type>swf|video|live)/id/|
+ index/iframe/cast_id/
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [
+ {
+ 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
+ 'info_dict': {
+ 'id': '774471',
+ 'ext': 'mp4',
+ 'title': 'Монологи на все времена',
+ 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
+ 'duration': 2906,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
+ 'info_dict': {
+ 'id': '774016',
+ 'ext': 'mp4',
+ 'title': 'Чужой в семье Сталина',
+ 'description': '',
+ 'duration': 2539,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
+ 'info_dict': {
+ 'id': '766888',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+ 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+ 'duration': 279,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
+ 'info_dict': {
+ 'id': '771852',
+ 'ext': 'mp4',
+ 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
+ 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
+ 'duration': 3096,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
+ 'info_dict': {
+ 'id': '51499',
+ 'ext': 'flv',
+ 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+ 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+ },
+ 'skip': 'Translation has finished',
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
+ 'info_dict': {
+ 'id': '21',
+ 'ext': 'mp4',
+ 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
+ 'only_matching': True,
+ },
+ ]
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ mobj = re.search(
+ r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ video_path = mobj.group('path')
+
+ if re.match(r'flash\d+v', video_path):
+ video_type = 'video'
+ elif video_path.startswith('iframe'):
+ video_type = mobj.group('type')
+ if video_type == 'swf':
+ video_type = 'video'
+ elif video_path.startswith('index/iframe/cast_id'):
+ video_type = 'live'
+
+ is_live = video_type == 'live'
+
+ json_data = self._download_json(
+ 'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id),
+ video_id, 'Downloading JSON')
+
+ if json_data['errors']:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
+
+ playlist = json_data['data']['playlist']
+ medialist = playlist['medialist']
+ media = medialist[0]
+
+ if media['errors']:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
+
+ view_count = playlist.get('count_views')
+ priority_transport = playlist['priority_transport']
+
+ thumbnail = media['picture']
+ width = int_or_none(media['width'])
+ height = int_or_none(media['height'])
+ description = media['anons']
+ title = media['title']
+ duration = int_or_none(media.get('duration'))
+
+ formats = []
+
+ for transport, links in media['sources'].items():
+ for quality, url in links.items():
+ preference = -1 if priority_transport == transport else -2
+ if transport == 'rtmp':
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
+ if not mobj:
+ continue
+ fmt = {
+ 'url': mobj.group('url'),
+ 'play_path': mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'page_url': 'http://player.rutv.ru',
+ 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
+ 'rtmp_live': True,
+ 'ext': 'flv',
+ 'vbr': int(quality),
+ 'preference': preference,
+ }
+ elif transport == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
+ continue
+ else:
+ fmt = {
+ 'url': url
+ }
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'format_id': '%s-%s' % (transport, quality),
+ })
+ formats.append(fmt)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/ruutu.py b/hypervideo_dl/extractor/ruutu.py
new file mode 100644
index 0000000..c50cd3e
--- /dev/null
+++ b/hypervideo_dl/extractor/ruutu.py
@@ -0,0 +1,227 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ find_xpath_attr,
+ int_or_none,
+ unified_strdate,
+ url_or_none,
+ xpath_attr,
+ xpath_text,
+)
+
+
+class RuutuIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/|
+ static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.ruutu.fi/video/2058907',
+ 'md5': 'ab2093f39be1ca8581963451b3c0234f',
+ 'info_dict': {
+ 'id': '2058907',
+ 'ext': 'mp4',
+ 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
+ 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 114,
+ 'age_limit': 0,
+ },
+ },
+ {
+ 'url': 'http://www.ruutu.fi/video/2057306',
+ 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
+ 'info_dict': {
+ 'id': '2057306',
+ 'ext': 'mp4',
+ 'title': 'Superpesis: katso koko kausi Ruudussa',
+ 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 40,
+ 'age_limit': 0,
+ },
+ },
+ {
+ 'url': 'http://www.supla.fi/supla/2231370',
+ 'md5': 'df14e782d49a2c0df03d3be2a54ef949',
+ 'info_dict': {
+ 'id': '2231370',
+ 'ext': 'mp4',
+ 'title': 'Osa 1: Mikael Jungner',
+ 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ },
+ },
+ # Episode where <SourceFile> is "NOT-USED", but has other
+ # downloadable sources available.
+ {
+ 'url': 'http://www.ruutu.fi/video/3193728',
+ 'only_matching': True,
+ },
+ {
+ # audio podcast
+ 'url': 'https://www.supla.fi/supla/3382410',
+ 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908',
+ 'info_dict': {
+ 'id': '3382410',
+ 'ext': 'mp3',
+ 'title': 'Mikä ihmeen poltergeist?',
+ 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ },
+ 'expected_warnings': [
+ 'HTTP Error 502: Bad Gateway',
+ 'Failed to download m3u8 information',
+ ],
+ },
+ {
+ 'url': 'http://www.supla.fi/audio/2231370',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790',
+ 'only_matching': True,
+ },
+ {
+ # episode
+ 'url': 'https://www.ruutu.fi/video/3401964',
+ 'info_dict': {
+ 'id': '3401964',
+ 'ext': 'mp4',
+ 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17',
+ 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2582,
+ 'age_limit': 12,
+ 'upload_date': '20190508',
+ 'series': 'Temptation Island Suomi',
+ 'season_number': 5,
+ 'episode_number': 17,
+ 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # premium
+ 'url': 'https://www.ruutu.fi/video/3618715',
+ 'only_matching': True,
+ },
+ ]
+ _API_BASE = 'https://gatling.nelonenmedia.fi'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_xml = self._download_xml(
+ '%s/media-xml-cache' % self._API_BASE, video_id,
+ query={'id': video_id})
+
+ formats = []
+ processed_urls = []
+
+ def extract_formats(node):
+ for child in node:
+ if child.tag.endswith('Files'):
+ extract_formats(child)
+ elif child.tag.endswith('File'):
+ video_url = child.text
+ if (not video_url or video_url in processed_urls
+ or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
+ continue
+ processed_urls.append(video_url)
+ ext = determine_ext(video_url)
+ auth_video_url = url_or_none(self._download_webpage(
+ '%s/auth/access/v2' % self._API_BASE, video_id,
+ note='Downloading authenticated %s stream URL' % ext,
+ fatal=False, query={'stream': video_url}))
+ if auth_video_url:
+ processed_urls.append(auth_video_url)
+ video_url = auth_video_url
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds', fatal=False))
+ elif ext == 'mpd':
+ # video-only and audio-only streams are of different
+ # duration resulting in out of sync issue
+ continue
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp3' or child.tag == 'AudioMediaFile':
+ formats.append({
+ 'format_id': 'audio',
+ 'url': video_url,
+ 'vcodec': 'none',
+ })
+ else:
+ proto = compat_urllib_parse_urlparse(video_url).scheme
+ if not child.tag.startswith('HTTP') and proto != 'rtmp':
+ continue
+ preference = -1 if proto == 'rtmp' else 1
+ label = child.get('label')
+ tbr = int_or_none(child.get('bitrate'))
+ format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'preference': preference,
+ })
+
+ extract_formats(video_xml.find('./Clip'))
+
+ def pv(name):
+ node = find_xpath_attr(
+ video_xml, './Clip/PassthroughVariables/variable', 'name', name)
+ if node is not None:
+ return node.get('value')
+
+ if not formats:
+ drm = xpath_text(video_xml, './Clip/DRM', default=None)
+ if drm:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ ns_st_cds = pv('ns_st_cds')
+ if ns_st_cds != 'free':
+ raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
+
+ self._sort_formats(formats)
+
+ themes = pv('themes')
+
+ return {
+ 'id': video_id,
+ 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
+ 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
+ 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
+ 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')),
+ 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
+ 'upload_date': unified_strdate(pv('date_start')),
+ 'series': pv('series_name'),
+ 'season_number': int_or_none(pv('season_number')),
+ 'episode_number': int_or_none(pv('episode_number')),
+ 'categories': themes.split(',') if themes else [],
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ruv.py b/hypervideo_dl/extractor/ruv.py
new file mode 100644
index 0000000..8f3cc40
--- /dev/null
+++ b/hypervideo_dl/extractor/ruv.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ unified_timestamp,
+)
+
+
+class RuvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
+ _TESTS = [{
+ # m3u8
+ 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
+ 'md5': '66347652f4e13e71936817102acc1724',
+ 'info_dict': {
+ 'id': '1144499',
+ 'display_id': 'fh-valur/20170516',
+ 'ext': 'mp4',
+ 'title': 'FH - Valur',
+ 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
+ 'timestamp': 1494963600,
+ 'upload_date': '20170516',
+ },
+ }, {
+ # mp3
+ 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
+ 'md5': '395ea250c8a13e5fdb39d4670ef85378',
+ 'info_dict': {
+ 'id': '1153630',
+ 'display_id': 'morgunutvarpid/20170619',
+ 'ext': 'mp3',
+ 'title': 'Morgunútvarpið',
+ 'description': 'md5:a4cf1202c0a1645ca096b06525915418',
+ 'timestamp': 1497855000,
+ 'upload_date': '20170619',
+ },
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ruv.is/node/1151854',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+
+ FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
+
+ media_url = self._html_search_regex(
+ FIELD_RE % 'src', webpage, 'video URL', group='url')
+
+ video_id = self._search_regex(
+ r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
+ webpage, 'video id', default=display_id)
+
+ ext = determine_ext(media_url)
+
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ elif ext == 'mp3':
+ formats = [{
+ 'format_id': 'mp3',
+ 'url': media_url,
+ 'vcodec': 'none',
+ }]
+ else:
+ formats = [{
+ 'url': media_url,
+ }]
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._search_regex(
+ FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/safari.py b/hypervideo_dl/extractor/safari.py
new file mode 100644
index 0000000..2cc6651
--- /dev/null
+++ b/hypervideo_dl/extractor/safari.py
@@ -0,0 +1,264 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ update_url_query,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://learning.oreilly.com/api/v1'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ _, urlh = self._download_webpage_handle(
+ 'https://learning.oreilly.com/accounts/login-check/', None,
+ 'Downloading login page')
+
+ def is_logged(urlh):
+ return 'learning.oreilly.com/home/' in urlh.geturl()
+
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
+
+ redirect_url = urlh.geturl()
+ parsed_url = compat_urlparse.urlparse(redirect_url)
+ qs = compat_parse_qs(parsed_url.query)
+ next_uri = compat_urlparse.urljoin(
+ 'https://api.oreilly.com', qs['next'][0])
+
+ auth, urlh = self._download_json_handle(
+ 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
+ data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'redirect_uri': next_uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': redirect_url,
+ }, expected_status=400)
+
+ credentials = auth.get('credentials')
+ if (not auth.get('logged_in') and not auth.get('redirect_uri')
+ and credentials):
+ raise ExtractorError(
+ 'Unable to login: %s' % credentials, expected=True)
+
+ # oreilly serves two same instances of the following cookies
+ # in Set-Cookie header and expects first one to be actually set
+ for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
+ self._apply_first_set_cookie_header(urlh, cookie)
+
+ _, urlh = self._download_webpage_handle(
+ auth.get('redirect_uri') or next_uri, None, 'Completing login',)
+
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
+
+ raise ExtractorError('Unable to log in')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
+ (?:
+ library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
+ videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': 'dcc5a425e79f2564148652616af1f2a3',
+ 'info_dict': {
+ 'id': '0_qbqx90ic',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Hadoop Fundamentals LiveLessons',
+ 'timestamp': 1437758058,
+ 'upload_date': '20150724',
+ 'uploader_id': 'stork',
+ },
+ }, {
+ # non-digits in course id
+ 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
+ 'only_matching': True,
+ }]
+
+ _PARTNER_ID = '1926081'
+ _UICONF_ID = '29375172'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ reference_id = mobj.group('reference_id')
+ if reference_id:
+ video_id = reference_id
+ partner_id = self._PARTNER_ID
+ ui_id = self._UICONF_ID
+ else:
+ video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ mobj = re.match(self._VALID_URL, urlh.geturl())
+ reference_id = mobj.group('reference_id')
+ if not reference_id:
+ reference_id = self._search_regex(
+ r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura reference id', group='id')
+ partner_id = self._search_regex(
+ r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura widget id', default=self._PARTNER_ID,
+ group='id')
+ ui_id = self._search_regex(
+ r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura uiconf id', default=self._UICONF_ID,
+ group='id')
+
+ query = {
+ 'wid': '_%s' % partner_id,
+ 'uiconf_id': ui_id,
+ 'flashvars[referenceId]': reference_id,
+ }
+
+ if self.LOGGED_IN:
+ kaltura_session = self._download_json(
+ '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
+ video_id, 'Downloading kaltura session JSON',
+ 'Unable to download kaltura session JSON', fatal=False,
+ headers={'Accept': 'application/json'})
+ if kaltura_session:
+ session = kaltura_session.get('session')
+ if session:
+ query['flashvars[ks]'] = session
+
+ return self.url_result(update_url_query(
+ 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
+ 'Kaltura')
+
+
+class SafariApiIE(SafariBaseIE):
+ IE_NAME = 'safari:api'
+ _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ part = self._download_json(
+ url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
+ 'Downloading part JSON')
+ return self.url_result(part['web_url'], SafariIE.ie_key())
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book|
+ videos/[^/]+
+ )|
+ techbus\.safaribooksonline\.com
+ )
+ /(?P<id>[^/]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://techbus.safaribooksonline.com/9780134426365',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
+ else super(SafariCourseIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, SafariApiIE.ie_key())
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/hypervideo_dl/extractor/samplefocus.py b/hypervideo_dl/extractor/samplefocus.py
new file mode 100644
index 0000000..806c3c3
--- /dev/null
+++ b/hypervideo_dl/extractor/samplefocus.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ get_element_by_attribute,
+ int_or_none,
+)
+
+
+class SampleFocusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
+ 'md5': '48c8d62d60be467293912e0e619a5120',
+ 'info_dict': {
+ 'id': '40316',
+ 'display_id': 'lil-peep-sad-emo-guitar',
+ 'ext': 'mp3',
+ 'title': 'Lil Peep Sad Emo Guitar',
+ 'thumbnail': r're:^https?://.+\.png',
+ 'license': 'Standard License',
+ 'uploader': 'CapsCtrl',
+ 'uploader_id': 'capsctrl',
+ 'like_count': int,
+ 'comment_count': int,
+ 'categories': ['Samples', 'Guitar', 'Electric guitar'],
+ },
+ }, {
+ 'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
+ 'only_matching': True
+ }, {
+ 'url': 'https://samplefocus.com/samples/young-chop-kick',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ sample_id = self._search_regex(
+ r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
+ webpage, 'sample id', group='id')
+
+ title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title')
+
+ mp3_url = self._search_regex(
+ r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
+ webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
+ r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
+ webpage, 'mp3 url', group=0))['content']
+
+ thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
+ r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
+ webpage, 'mp3', fatal=False, group='url')
+
+ comments = []
+ for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
+ comments.append({
+ 'author': author,
+ 'author_id': author_id,
+ 'text': body,
+ })
+
+ uploader_id = uploader = None
+ mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
+ if mobj:
+ uploader_id, uploader = mobj.groups()
+
+ breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
+ categories = []
+ if breadcrumb:
+ for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
+ categories.append(name)
+
+ def extract_count(klass):
+ return int_or_none(self._html_search_regex(
+ r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
+ webpage, klass, fatal=False))
+
+ return {
+ 'id': sample_id,
+ 'title': title,
+ 'url': mp3_url,
+ 'display_id': display_id,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'license': self._html_search_regex(
+ r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
+ webpage, 'license', fatal=False, group='license'),
+ 'uploader_id': uploader_id,
+ 'like_count': extract_count('sample-%s-favorites' % sample_id),
+ 'comment_count': extract_count('comments'),
+ 'comments': comments,
+ 'categories': categories,
+ }
diff --git a/hypervideo_dl/extractor/sapo.py b/hypervideo_dl/extractor/sapo.py
new file mode 100644
index 0000000..49a9b31
--- /dev/null
+++ b/hypervideo_dl/extractor/sapo.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
+
+
+class SapoIE(InfoExtractor):
+ IE_DESC = 'SAPO Vídeos'
+ _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
+
+ _TESTS = [
+ {
+ 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
+ 'md5': '79ee523f6ecb9233ac25075dee0eda83',
+ 'note': 'SD video',
+ 'info_dict': {
+ 'id': 'UBz95kOtiWYUMTA5Ghfi',
+ 'ext': 'mp4',
+ 'title': 'Benfica - Marcas na Hitória',
+ 'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
+ 'duration': 264,
+ 'uploader': 'tiago_1988',
+ 'upload_date': '20080229',
+ 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
+ },
+ },
+ {
+ 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
+ 'md5': '90a2f283cfb49193fe06e861613a72aa',
+ 'note': 'HD video',
+ 'info_dict': {
+ 'id': 'IyusNAZ791ZdoCY5H5IF',
+ 'ext': 'mp4',
+ 'title': 'Codebits VII - Report',
+ 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
+ 'duration': 144,
+ 'uploader': 'codebits',
+ 'upload_date': '20140427',
+ 'categories': ['codebits', 'codebits2014'],
+ },
+ },
+ {
+ 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
+ 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
+ 'note': 'v2 video',
+ 'info_dict': {
+ 'id': 'yLqjzPtbTimsn2wWBKHz',
+ 'ext': 'mp4',
+ 'title': 'Hipnose Condicionativa 4',
+ 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
+ 'duration': 692,
+ 'uploader': 'sapozen',
+ 'upload_date': '20090609',
+ 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ item = self._download_xml(
+ 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
+
+ title = item.find('./title').text
+ description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
+ thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
+ duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
+ uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
+ upload_date = unified_strdate(item.find('./pubDate').text)
+ view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
+ comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
+ tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
+ categories = tags.split() if tags else []
+ age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
+
+ video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
+ video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': 'sd',
+ 'width': int(video_size[0]),
+ 'height': int(video_size[1]),
+ }]
+
+ if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
+ formats.append({
+ 'url': re.sub(r'/mov/1$', '/mov/39', video_url),
+ 'ext': 'mp4',
+ 'format_id': 'hd',
+ 'width': 1280,
+ 'height': 720,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/savefrom.py b/hypervideo_dl/extractor/savefrom.py
new file mode 100644
index 0000000..21e44b6
--- /dev/null
+++ b/hypervideo_dl/extractor/savefrom.py
@@ -0,0 +1,34 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os.path
+import re
+
+from .common import InfoExtractor
+
+
+class SaveFromIE(InfoExtractor):
+ IE_NAME = 'savefrom.net'
+ _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
+
+ _TEST = {
+ 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
+ 'info_dict': {
+ 'id': 'UlVRAPW2WJY',
+ 'ext': 'mp4',
+ 'title': 'About Team Radical MMA | MMA Fighting',
+ 'upload_date': '20120816',
+ 'uploader': 'Howcast',
+ 'uploader_id': 'Howcast',
+ 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = os.path.splitext(url.split('/')[-1])[0]
+
+ return self.url_result(mobj.group('url'), video_id=video_id)
diff --git a/hypervideo_dl/extractor/sbs.py b/hypervideo_dl/extractor/sbs.py
new file mode 100644
index 0000000..0a806ee
--- /dev/null
+++ b/hypervideo_dl/extractor/sbs.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ smuggle_url,
+ ExtractorError,
+)
+
+
+class SBSIE(InfoExtractor):
+ IE_DESC = 'sbs.com.au'
+ _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ # Original URL is handled by the generic IE which finds the iframe:
+ # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
+ 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
+ 'md5': '3150cf278965eeabb5b4cea1c963fe0a',
+ 'info_dict': {
+ 'id': '_rFBPRPO4pMR',
+ 'ext': 'mp4',
+ 'title': 'Dingo Conservation (The Feed)',
+ 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 308,
+ 'timestamp': 1408613220,
+ 'upload_date': '20140821',
+ 'uploader': 'SBSC',
+ },
+ }, {
+ 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ player_params = self._download_json(
+ 'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
+
+ error = player_params.get('error')
+ if error:
+ error_message = 'Sorry, The video you are looking for does not exist.'
+ video_data = error.get('results') or {}
+ error_code = error.get('errorCode')
+ if error_code == 'ComingSoon':
+ error_message = '%s is not yet available.' % video_data.get('title', '')
+ elif error_code in ('Forbidden', 'intranetAccessOnly'):
+ error_message = 'Sorry, This video cannot be accessed via this website'
+ elif error_code == 'Expired':
+ error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+
+ urls = player_params['releaseUrls']
+ theplatform_url = (urls.get('progressive') or urls.get('html')
+ or urls.get('standard') or player_params['relatedItemsURL'])
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'id': video_id,
+ 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
+ }
diff --git a/hypervideo_dl/extractor/screencast.py b/hypervideo_dl/extractor/screencast.py
new file mode 100644
index 0000000..69a0d01
--- /dev/null
+++ b/hypervideo_dl/extractor/screencast.py
@@ -0,0 +1,123 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_request,
+)
+from ..utils import (
+ ExtractorError,
+)
+
+
+class ScreencastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.screencast.com/t/3ZEjQXlT',
+ 'md5': '917df1c13798a3e96211dd1561fded83',
+ 'info_dict': {
+ 'id': '3ZEjQXlT',
+ 'ext': 'm4v',
+ 'title': 'Color Measurement with Ocean Optics Spectrometers',
+ 'description': 'md5:240369cde69d8bed61349a199c5fb153',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
+ 'md5': 'e8e4b375a7660a9e7e35c33973410d34',
+ 'info_dict': {
+ 'id': 'V2uXehPJa1ZI',
+ 'ext': 'mov',
+ 'title': 'The Amadeus Spectrometer',
+ 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://www.screencast.com/t/aAB3iowa',
+ 'md5': 'dedb2734ed00c9755761ccaee88527cd',
+ 'info_dict': {
+ 'id': 'aAB3iowa',
+ 'ext': 'mp4',
+ 'title': 'Google Earth Export',
+ 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://www.screencast.com/t/X3ddTrYh',
+ 'md5': '669ee55ff9c51988b4ebc0877cc8b159',
+ 'info_dict': {
+ 'id': 'X3ddTrYh',
+ 'ext': 'wmv',
+ 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
+ 'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
+ 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
+ }
+ }, {
+ 'url': 'http://screencast.com/t/aAB3iowa',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'<embed name="Video".*?src="([^"]+)"', webpage,
+ 'QuickTime embed', default=None)
+
+ if video_url is None:
+ flash_vars_s = self._html_search_regex(
+ r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
+ default=None)
+ if not flash_vars_s:
+ flash_vars_s = self._html_search_regex(
+ r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
+ default=None)
+ if flash_vars_s:
+ flash_vars_s = flash_vars_s.replace(',', '&')
+ if flash_vars_s:
+ flash_vars = compat_parse_qs(flash_vars_s)
+ video_url_raw = compat_urllib_request.quote(
+ flash_vars['content'][0])
+ video_url = video_url_raw.replace('http%3A', 'http:')
+
+ if video_url is None:
+ video_meta = self._html_search_meta(
+ 'og:video', webpage, default=None)
+ if video_meta:
+ video_url = self._search_regex(
+ r'src=(.*?)(?:$|&)', video_meta,
+ 'meta tag video URL', default=None)
+
+ if video_url is None:
+ video_url = self._html_search_regex(
+ r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video url', default=None, group='url')
+
+ if video_url is None:
+ video_url = self._html_search_meta(
+ 'og:video', webpage, default=None)
+
+ if video_url is None:
+ raise ExtractorError('Cannot find video')
+
+ title = self._og_search_title(webpage, default=None)
+ if title is None:
+ title = self._html_search_regex(
+ [r'<b>Title:</b> ([^<]+)</div>',
+ r'class="tabSeperator">></span><span class="tabText">(.+?)<',
+ r'<title>([^<]+)</title>'],
+ webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(webpage, default=None)
+ if description is None:
+ description = self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/screencastomatic.py b/hypervideo_dl/extractor/screencastomatic.py
new file mode 100644
index 0000000..0afdc17
--- /dev/null
+++ b/hypervideo_dl/extractor/screencastomatic.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_class,
+ int_or_none,
+ remove_start,
+ strip_or_none,
+ unified_strdate,
+)
+
+
+class ScreencastOMaticIE(InfoExtractor):
+ _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
+ 'md5': '483583cb80d92588f15ccbedd90f0c18',
+ 'info_dict': {
+ 'id': 'c2lD3BeOPl',
+ 'ext': 'mp4',
+ 'title': 'Welcome to 3-4 Philosophy @ DECV!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
+ 'duration': 369,
+ 'upload_date': '20141216',
+ }
+ }, {
+ 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://screencast-o-matic.com/player/' + video_id, video_id)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info.update({
+ 'id': video_id,
+ 'title': get_element_by_class('overlayTitle', webpage),
+ 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
+ 'duration': int_or_none(self._search_regex(
+ r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
+ webpage, 'duration', default=None)),
+ 'upload_date': unified_strdate(remove_start(
+ get_element_by_class('overlayPublished', webpage), 'Published: ')),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/scrippsnetworks.py b/hypervideo_dl/extractor/scrippsnetworks.py
new file mode 100644
index 0000000..b40b4c4
--- /dev/null
+++ b/hypervideo_dl/extractor/scrippsnetworks.py
@@ -0,0 +1,152 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import hashlib
+import re
+
+from .aws import AWSIE
+from .anvato import AnvatoIE
+from .common import InfoExtractor
+from ..utils import (
+ smuggle_url,
+ urlencode_postdata,
+ xpath_text,
+)
+
+
+class ScrippsNetworksWatchIE(AWSIE):
+ IE_NAME = 'scrippsnetworks:watch'
+ _VALID_URL = r'''(?x)
+ https?://
+ watch\.
+ (?P<site>geniuskitchen)\.com/
+ (?:
+ player\.[A-Z0-9]+\.html\#|
+ show/(?:[^/]+/){2}|
+ player/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
+ 'info_dict': {
+ 'id': '4194875',
+ 'ext': 'mp4',
+ 'title': 'Ample Hills Ice Cream Bike',
+ 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
+ 'uploader': 'ANV',
+ 'upload_date': '20171011',
+ 'timestamp': 1507698000,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [AnvatoIE.ie_key()],
+ }]
+
+ _SNI_TABLE = {
+ 'geniuskitchen': 'genius',
+ }
+
+ _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+ _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
+
+ _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site', 'id')
+
+ aws_identity_id_json = json.dumps({
+ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
+ }).encode('utf-8')
+ token = self._download_json(
+ 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
+ data=aws_identity_id_json,
+ headers={
+ 'Accept': '*/*',
+ 'Content-Type': 'application/x-amz-json-1.1',
+ 'Referer': url,
+ 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
+ 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ })['Token']
+
+ sts = self._download_xml(
+ 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+ 'Action': 'AssumeRoleWithWebIdentity',
+ 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+ 'RoleSessionName': 'web-identity',
+ 'Version': '2011-06-15',
+ 'WebIdentityToken': token,
+ }), headers={
+ 'Referer': url,
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
+
+ def get(key):
+ return xpath_text(
+ sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+ fatal=True)
+
+ mcp_id = self._aws_execute_api({
+ 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
+ 'access_key': get('AccessKeyId'),
+ 'secret_key': get('SecretAccessKey'),
+ 'session_token': get('SessionToken'),
+ }, video_id)['results'][0]['mcpId']
+
+ return self.url_result(
+ smuggle_url(
+ 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+ {'geo_countries': ['US']}),
+ AnvatoIE.ie_key(), video_id=mcp_id)
+
+
+class ScrippsNetworksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
+ 'info_dict': {
+ 'id': '0260338',
+ 'ext': 'mp4',
+ 'title': 'The Best of the Best',
+ 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
+ 'timestamp': 1475678834,
+ 'upload_date': '20161005',
+ 'uploader': 'SCNI-SCND',
+ },
+ 'add_ie': ['ThePlatform'],
+ }, {
+ 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
+ 'only_matching': True,
+ }]
+ _ACCOUNT_MAP = {
+ 'cookingchanneltv': 2433005105,
+ 'discovery': 2706091867,
+ 'diynetwork': 2433004575,
+ 'foodnetwork': 2433005105,
+ 'hgtv': 2433004575,
+ 'travelchannel': 2433005739,
+ }
+ _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
+
+ def _real_extract(self, url):
+ site, guid = re.match(self._VALID_URL, url).groups()
+ return self.url_result(smuggle_url(
+ self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
+ {'force_smil_url': True}), 'ThePlatform', guid)
diff --git a/hypervideo_dl/extractor/scte.py b/hypervideo_dl/extractor/scte.py
new file mode 100644
index 0000000..ca1de63
--- /dev/null
+++ b/hypervideo_dl/extractor/scte.py
@@ -0,0 +1,144 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ decode_packed_codes,
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class SCTEBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
+ _NETRC_MACHINE = 'scte'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']welcome\b', r'>Sign Out<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._hidden_inputs(login_popup)
+
+ login_form.update({
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ if '|pageRedirect|' not in response and not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class SCTEIE(SCTEBaseIE):
+ _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
+ 'info_dict': {
+ 'title': 'Introduction to DOCSIS Engineering Professional',
+ 'id': '31484',
+ },
+ 'playlist_count': 5,
+ 'skip': 'Requires account credentials',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
+ content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
+ context = decode_packed_codes(self._download_webpage(
+ '%smobile/data.js' % content_base, video_id))
+
+ data = self._parse_xml(
+ self._search_regex(
+ r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
+ video_id)
+
+ entries = []
+ for asset in data.findall('.//asset'):
+ asset_url = asset.get('url')
+ if not asset_url or not asset_url.endswith('.mp4'):
+ continue
+ asset_id = self._search_regex(
+ r'video_([^_]+)_', asset_url, 'asset id', default=None)
+ if not asset_id:
+ continue
+ entries.append({
+ 'id': asset_id,
+ 'title': title,
+ 'url': content_base + asset_url,
+ })
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class SCTECourseIE(SCTEBaseIE):
+ _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3639',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3073',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_id)
+
+ title = self._search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', default=None)
+
+ entries = []
+ for mobj in re.finditer(
+ r'''(?x)
+ <a[^>]+
+ href=(["\'])
+ (?P<url>
+ https?://learning\.scte\.org/mod/
+ (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
+ \bid=\d+
+ )
+ ''',
+ webpage):
+ item_url = mobj.group('url')
+ if item_url == url:
+ continue
+ ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
+ else SCTECourseIE.ie_key())
+ entries.append(self.url_result(item_url, ie=ie))
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/hypervideo_dl/extractor/seeker.py b/hypervideo_dl/extractor/seeker.py
new file mode 100644
index 0000000..7872dc8
--- /dev/null
+++ b/hypervideo_dl/extractor/seeker.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_class,
+ strip_or_none,
+)
+
+
+class SeekerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
+ 'md5': '897d44bbe0d8986a2ead96de565a92db',
+ 'info_dict': {
+ 'id': 'Elrn3gnY',
+ 'ext': 'mp4',
+ 'title': 'Should Trump Be Required To Release His Tax Returns?',
+ 'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
+ 'timestamp': 1490090165,
+ 'upload_date': '20170321',
+ }
+ }, {
+ 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
+ 'playlist': [
+ {
+ 'md5': '0497b9f20495174be73ae136949707d2',
+ 'info_dict': {
+ 'id': 'FihYQ8AE',
+ 'ext': 'mp4',
+ 'title': 'The Pros & Cons Of Zoos',
+ 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
+ 'timestamp': 1490039133,
+ 'upload_date': '20170320',
+ },
+ }
+ ],
+ 'info_dict': {
+ 'id': '1834116536',
+ 'title': 'After Gorilla Killing, Changes Ahead for Zoos',
+ 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, article_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ entries = []
+ for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
+ entries.append(self.url_result(
+ 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
+ return self.playlist_result(
+ entries, article_id,
+ self._og_search_title(webpage),
+ strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))
diff --git a/hypervideo_dl/extractor/senateisvp.py b/hypervideo_dl/extractor/senateisvp.py
new file mode 100644
index 0000000..db5ef8b
--- /dev/null
+++ b/hypervideo_dl/extractor/senateisvp.py
@@ -0,0 +1,153 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unsmuggle_url,
+)
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class SenateISVPIE(InfoExtractor):
+ _COMM_MAP = [
+ ['ag', '76440', 'http://ag-f.akamaihd.net'],
+ ['aging', '76442', 'http://aging-f.akamaihd.net'],
+ ['approps', '76441', 'http://approps-f.akamaihd.net'],
+ ['armed', '76445', 'http://armed-f.akamaihd.net'],
+ ['banking', '76446', 'http://banking-f.akamaihd.net'],
+ ['budget', '76447', 'http://budget-f.akamaihd.net'],
+ ['cecc', '76486', 'http://srs-f.akamaihd.net'],
+ ['commerce', '80177', 'http://commerce1-f.akamaihd.net'],
+ ['csce', '75229', 'http://srs-f.akamaihd.net'],
+ ['dpc', '76590', 'http://dpc-f.akamaihd.net'],
+ ['energy', '76448', 'http://energy-f.akamaihd.net'],
+ ['epw', '76478', 'http://epw-f.akamaihd.net'],
+ ['ethics', '76449', 'http://ethics-f.akamaihd.net'],
+ ['finance', '76450', 'http://finance-f.akamaihd.net'],
+ ['foreign', '76451', 'http://foreign-f.akamaihd.net'],
+ ['govtaff', '76453', 'http://govtaff-f.akamaihd.net'],
+ ['help', '76452', 'http://help-f.akamaihd.net'],
+ ['indian', '76455', 'http://indian-f.akamaihd.net'],
+ ['intel', '76456', 'http://intel-f.akamaihd.net'],
+ ['intlnarc', '76457', 'http://intlnarc-f.akamaihd.net'],
+ ['jccic', '85180', 'http://jccic-f.akamaihd.net'],
+ ['jec', '76458', 'http://jec-f.akamaihd.net'],
+ ['judiciary', '76459', 'http://judiciary-f.akamaihd.net'],
+ ['rpc', '76591', 'http://rpc-f.akamaihd.net'],
+ ['rules', '76460', 'http://rules-f.akamaihd.net'],
+ ['saa', '76489', 'http://srs-f.akamaihd.net'],
+ ['smbiz', '76461', 'http://smbiz-f.akamaihd.net'],
+ ['srs', '75229', 'http://srs-f.akamaihd.net'],
+ ['uscc', '76487', 'http://srs-f.akamaihd.net'],
+ ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
+ ['arch', '', 'http://ussenate-f.akamaihd.net/']
+ ]
+ _IE_NAME = 'senate.gov'
+ _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
+ _TESTS = [{
+ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
+ 'info_dict': {
+ 'id': 'commerce011514',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
+ # checksum differs each time
+ 'info_dict': {
+ 'id': 'intel090613',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player'
+ }
+ }, {
+ # From http://www.c-span.org/video/?96791-1
+ 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _search_iframe_url(webpage):
+ mobj = re.search(
+ r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _get_info_for_comm(self, committee):
+ for entry in self._COMM_MAP:
+ if entry[0] == committee:
+ return entry[1:]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs'))
+ if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ video_id = re.sub(r'.mp4$', '', qs['filename'][0])
+
+ webpage = self._download_webpage(url, video_id)
+
+ if smuggled_data.get('force_title'):
+ title = smuggled_data['force_title']
+ else:
+ title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id)
+ poster = qs.get('poster')
+ thumbnail = poster[0] if poster else None
+
+ video_type = qs['type'][0]
+ committee = video_type if video_type == 'arch' else qs['comm'][0]
+ stream_num, domain = self._get_info_for_comm(committee)
+
+ formats = []
+ if video_type == 'arch':
+ filename = video_id if '.' in video_id else video_id + '.mp4'
+ formats = [{
+ # All parameters in the query string are necessary to prevent a 403 error
+ 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=',
+ }]
+ else:
+ hdcore_sign = 'hdcore=3.1.0'
+ url_params = (domain, video_id, stream_num)
+ f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign
+ m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
+ for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.append(entry)
+ for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
+ mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
+ if mobj:
+ entry['format_id'] += mobj.group('tag')
+ formats.append(entry)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/sendtonews.py b/hypervideo_dl/extractor/sendtonews.py
new file mode 100644
index 0000000..9d96529
--- /dev/null
+++ b/hypervideo_dl/extractor/sendtonews.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_iso8601,
+ update_url_query,
+ int_or_none,
+ determine_protocol,
+ unescapeHTML,
+)
+
+
+class SendtoNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
+
+ _TEST = {
+ # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
+ 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
+ 'info_dict': {
+ 'id': 'GxfCe0Zo7D-175909-5588'
+ },
+ 'playlist_count': 8,
+ # test the first video only to prevent lengthy tests
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '240385',
+ 'ext': 'mp4',
+ 'title': 'Indians introduce Encarnacion',
+ 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
+ 'duration': 137.898,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20170105',
+ 'timestamp': 1483649762,
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
+ (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
+ .*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
+ \1>''', webpage)
+ if mobj:
+ sc = mobj.group('SC')
+ return cls._URL_TEMPLATE % sc
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ data_url = update_url_query(
+ url.replace('embedplayer.php', 'data_read.php'),
+ {'cmd': 'loadInitial'})
+ playlist_data = self._download_json(data_url, playlist_id)
+
+ entries = []
+ for video in playlist_data['playlistData'][0]:
+ info_dict = self._parse_jwplayer_data(
+ video['jwconfiguration'],
+ require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
+
+ for f in info_dict['formats']:
+ if f.get('tbr'):
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'/(\d+)k/', f['url'], 'bitrate', default=None))
+ if not tbr:
+ continue
+ f.update({
+ 'format_id': '%s-%d' % (determine_protocol(f), tbr),
+ 'tbr': tbr,
+ })
+ self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id'))
+
+ thumbnails = []
+ if video.get('thumbnailUrl'):
+ thumbnails.append({
+ 'id': 'normal',
+ 'url': video['thumbnailUrl'],
+ })
+ if video.get('smThumbnailUrl'):
+ thumbnails.append({
+ 'id': 'small',
+ 'url': video['smThumbnailUrl'],
+ })
+ info_dict.update({
+ 'title': video['S_headLine'].strip(),
+ 'description': unescapeHTML(video.get('S_fullStory')),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(video.get('SM_length')),
+ 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
+ })
+ entries.append(info_dict)
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/servus.py b/hypervideo_dl/extractor/servus.py
new file mode 100644
index 0000000..1610ddc
--- /dev/null
+++ b/hypervideo_dl/extractor/servus.py
@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+ urlencode_postdata,
+ url_or_none,
+)
+
+
+class ServusIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
+ (?:servustv|pm-wissen)\.com/videos
+ )
+ /(?P<id>[aA]{2}-\w+|\d+-\d+)
+ '''
+ _TESTS = [{
+ # new URL schema
+ 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
+ 'md5': '60474d4c21f3eb148838f215c37f02b9',
+ 'info_dict': {
+ 'id': 'AA-1T6VBU5PW1W12',
+ 'ext': 'mp4',
+ 'title': 'Die Grünen aus Sicht des Volkes',
+ 'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
+ 'description': 'md5:1247204d85783afe3682644398ff2ec4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 62.442,
+ 'timestamp': 1605193976,
+ 'upload_date': '20201112',
+ 'series': 'Talk im Hangar-7',
+ 'season': 'Season 9',
+ 'season_number': 9,
+ 'episode': 'Episode 31 - September 14',
+ 'episode_number': 31,
+ }
+ }, {
+ # old URL schema
+ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).upper()
+
+ token = self._download_json(
+ 'https://auth.redbullmediahouse.com/token', video_id,
+ 'Downloading token', data=urlencode_postdata({
+ 'grant_type': 'client_credentials',
+ }), headers={
+ 'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
+ })
+ access_token = token['access_token']
+ token_type = token.get('token_type', 'Bearer')
+
+ video = self._download_json(
+ 'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
+ video_id, 'Downloading video JSON', headers={
+ 'Authorization': '%s %s' % (token_type, access_token),
+ })
+
+ formats = []
+ thumbnail = None
+ for resource in video['resources']:
+ if not isinstance(resource, dict):
+ continue
+ format_url = url_or_none(resource.get('url'))
+ if not format_url:
+ continue
+ extension = resource.get('extension')
+ type_ = resource.get('type')
+ if extension == 'jpg' or type_ == 'reference_keyframe':
+ thumbnail = format_url
+ continue
+ ext = determine_ext(format_url)
+ if type_ == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ elif type_ == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif extension == 'mp4' or ext == 'mp4':
+ formats.append({
+ 'url': format_url,
+ 'format_id': type_,
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ })
+ self._sort_formats(formats)
+
+ attrs = {}
+ for attribute in video['attributes']:
+ if not isinstance(attribute, dict):
+ continue
+ key = attribute.get('fieldKey')
+ value = attribute.get('fieldValue')
+ if not key or not value:
+ continue
+ attrs[key] = value
+
+ title = attrs.get('title_stv') or video_id
+ alt_title = attrs.get('title')
+ description = attrs.get('long_description') or attrs.get('short_description')
+ series = attrs.get('label')
+ season = attrs.get('season')
+ episode = attrs.get('chapter')
+ duration = float_or_none(attrs.get('duration'), scale=1000)
+ season_number = int_or_none(self._search_regex(
+ r'Season (\d+)', season or '', 'season number', default=None))
+ episode_number = int_or_none(self._search_regex(
+ r'Episode (\d+)', episode or '', 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': alt_title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': unified_timestamp(video.get('lastPublished')),
+ 'series': series,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/sevenplus.py b/hypervideo_dl/extractor/sevenplus.py
new file mode 100644
index 0000000..240afc1
--- /dev/null
+++ b/hypervideo_dl/extractor/sevenplus.py
@@ -0,0 +1,94 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .brightcove import BrightcoveNewIE
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ try_get,
+ update_url_query,
+)
+
+
+class SevenPlusIE(BrightcoveNewIE):
+ IE_NAME = '7plus'
+ _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
+ _TESTS = [{
+ 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
+ 'info_dict': {
+ 'id': 'MTYS7-003',
+ 'ext': 'mp4',
+ 'title': 'S7 E3 - Wind Surf',
+ 'description': 'md5:29c6a69f21accda7601278f81b46483d',
+ 'uploader_id': '5303576322001',
+ 'upload_date': '20171201',
+ 'timestamp': 1512106377,
+ 'series': 'Mighty Ships',
+ 'season_number': 7,
+ 'episode_number': 3,
+ 'episode': 'Wind Surf',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ path, episode_id = re.match(self._VALID_URL, url).groups()
+
+ try:
+ media = self._download_json(
+ 'https://videoservice.swm.digital/playback', episode_id, query={
+ 'appId': '7plus',
+ 'deviceType': 'web',
+ 'platformType': 'web',
+ 'accountId': 5303576322001,
+ 'referenceId': 'ref:' + episode_id,
+ 'deliveryId': 'csai',
+ 'videoType': 'vod',
+ })['media']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), episode_id)[0]['error_code'], expected=True)
+ raise
+
+ for source in media.get('sources', {}):
+ src = source.get('src')
+ if not src:
+ continue
+ source['src'] = update_url_query(src, {'rule': ''})
+
+ info = self._parse_brightcove_metadata(media, episode_id)
+
+ content = self._download_json(
+ 'https://component-cdn.swm.digital/content/' + path,
+ episode_id, headers={
+ 'market-id': 4,
+ }, fatal=False) or {}
+ for item in content.get('items', {}):
+ if item.get('componentData', {}).get('componentType') == 'infoPanel':
+ for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
+ value = item.get(src_key)
+ if value:
+ info[dst_key] = value
+ info['series'] = try_get(
+ item, lambda x: x['seriesLogo']['name'], compat_str)
+ mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
+ if mobj:
+ info.update({
+ 'season_number': int(mobj.group(1)),
+ 'episode_number': int(mobj.group(2)),
+ 'episode': mobj.group(3),
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/sexu.py b/hypervideo_dl/extractor/sexu.py
new file mode 100644
index 0000000..3df5152
--- /dev/null
+++ b/hypervideo_dl/extractor/sexu.py
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class SexuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://sexu.com/961791/',
+ 'md5': 'ff615aca9691053c94f8f10d96cd7884',
+ 'info_dict': {
+ 'id': '961791',
+ 'ext': 'mp4',
+ 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
+ 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
+ 'categories': list, # NSFW
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ jwvideo = self._parse_json(
+ self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
+ video_id)
+
+ sources = jwvideo['sources']
+
+ formats = [{
+ 'url': source['file'].replace('\\', ''),
+ 'format_id': source.get('label'),
+ 'height': int(self._search_regex(
+ r'^(\d+)[pP]', source.get('label', ''), 'height',
+ default=None)),
+ } for source in sources if source.get('file')]
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+
+ thumbnail = jwvideo.get('image')
+
+ categories_str = self._html_search_meta(
+ 'keywords', webpage, 'categories')
+ categories = (
+ None if categories_str is None
+ else categories_str.split(','))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/seznamzpravy.py b/hypervideo_dl/extractor/seznamzpravy.py
new file mode 100644
index 0000000..7a1c7e3
--- /dev/null
+++ b/hypervideo_dl/extractor/seznamzpravy.py
@@ -0,0 +1,169 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ urljoin,
+ int_or_none,
+ parse_codecs,
+ try_get,
+)
+
+
+def _raw_id(src_url):
+ return compat_urllib_parse_urlparse(src_url).path.split('/')[-1]
+
+
+class SeznamZpravyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
+ _TESTS = [{
+ 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy',
+ 'info_dict': {
+ 'id': '170889',
+ 'ext': 'mp4',
+ 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'duration': 241,
+ 'series': 'Svět bez obalu',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with Location key
+ 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5&sectionPrefixPostroll=%2Fzpravy%2Fvyzva&regression=false',
+ 'info_dict': {
+ 'id': '185688',
+ 'ext': 'mp4',
+ 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'series': 'Výzva',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url') for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1',
+ webpage)]
+
+ def _extract_sdn_formats(self, sdn_url, video_id):
+ sdn_data = self._download_json(sdn_url, video_id)
+
+ if sdn_data.get('Location'):
+ sdn_url = sdn_data['Location']
+ sdn_data = self._download_json(sdn_url, video_id)
+
+ formats = []
+ mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {}
+ for format_id, format_data in mp4_formats.items():
+ relative_url = format_data.get('url')
+ if not relative_url:
+ continue
+
+ try:
+ width, height = format_data.get('resolution')
+ except (TypeError, ValueError):
+ width, height = None, None
+
+ f = {
+ 'url': urljoin(sdn_url, relative_url),
+ 'format_id': 'http-%s' % format_id,
+ 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000),
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ }
+ f.update(parse_codecs(format_data.get('codec')))
+ formats.append(f)
+
+ pls = sdn_data.get('pls', {})
+
+ def get_url(format_id):
+ return try_get(pls, lambda x: x[format_id]['url'], compat_str)
+
+ dash_rel_url = get_url('dash')
+ if dash_rel_url:
+ formats.extend(self._extract_mpd_formats(
+ urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash',
+ fatal=False))
+
+ hls_rel_url = get_url('hls')
+ if hls_rel_url:
+ formats.extend(self._extract_m3u8_formats(
+ urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
+ m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats)
+ return formats
+
+ def _real_extract(self, url):
+ params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+ src = params['src'][0]
+ title = params['title'][0]
+ video_id = params.get('contentId', [_raw_id(src)])[0]
+ formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id)
+
+ duration = int_or_none(params.get('duration', [None])[0])
+ series = params.get('series', [None])[0]
+ thumbnail = params.get('poster', [None])[0]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'series': series,
+ 'formats': formats,
+ }
+
+
+class SeznamZpravyArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P<id>\d+)'
+ _API_URL = 'https://apizpravy.seznam.cz/'
+
+ _TESTS = [{
+ # two videos on one page, with SDN URL
+ 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990',
+ 'info_dict': {
+ 'id': '35990',
+ 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2',
+ 'description': 'md5:933f7b06fa337a814ba199d3596d27ba',
+ },
+ 'playlist_count': 2,
+ }, {
+ # video with live stream URL
+ 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489',
+ 'info_dict': {
+ 'id': '38489',
+ 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60',
+ 'description': 'md5:428e7926a1a81986ec7eb23078004fb4',
+ },
+ 'playlist_count': 1,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, article_id)
+
+ info = self._search_json_ld(webpage, article_id, default={})
+
+ title = info.get('title') or self._og_search_title(webpage, fatal=False)
+ description = info.get('description') or self._og_search_description(webpage)
+
+ return self.playlist_result([
+ self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
+ for entry_url in SeznamZpravyIE._extract_urls(webpage)],
+ article_id, title, description)
diff --git a/hypervideo_dl/extractor/shahid.py b/hypervideo_dl/extractor/shahid.py
new file mode 100644
index 0000000..88b938e
--- /dev/null
+++ b/hypervideo_dl/extractor/shahid.py
@@ -0,0 +1,225 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import math
+import re
+
+from .aws import AWSIE
+from ..compat import compat_HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ InAdvancePagedList,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ urlencode_postdata,
+)
+
+
+class ShahidBaseIE(AWSIE):
+ _AWS_PROXY_HOST = 'api2.shahid.net'
+ _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
+ _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
+
+ def _handle_error(self, e):
+ fail_data = self._parse_json(
+ e.cause.read().decode('utf-8'), None, fatal=False)
+ if fail_data:
+ faults = fail_data.get('faults', [])
+ faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
+ if faults_message:
+ raise ExtractorError(faults_message, expected=True)
+
+ def _call_api(self, path, video_id, request=None):
+ query = {}
+ if request:
+ query['request'] = json.dumps(request)
+ try:
+ return self._aws_execute_api({
+ 'uri': '/proxy/v2/' + path,
+ 'access_key': 'AKIAI6X4TYCIXM2B7MUQ',
+ 'secret_key': '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',
+ }, video_id, query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ self._handle_error(e)
+ raise
+
+
+class ShahidIE(ShahidBaseIE):
+ _NETRC_MACHINE = 'shahid'
+ _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
+ 'info_dict': {
+ 'id': '816924',
+ 'ext': 'mp4',
+ 'title': 'متحف الدحيح الموسم 1 كليب 1',
+ 'timestamp': 1602806400,
+ 'upload_date': '20201016',
+ 'description': 'برومو',
+ 'duration': 22,
+ 'categories': ['كوميديا'],
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
+ 'only_matching': True
+ }, {
+ # shahid plus subscriber only
+ 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
+ 'only_matching': True
+ }, {
+ 'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
+ 'only_matching': True
+ }]
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ try:
+ user_data = self._download_json(
+ 'https://shahid.mbc.net/wd/service/users/login',
+ None, 'Logging in', data=json.dumps({
+ 'email': email,
+ 'password': password,
+ 'basic': 'false',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['user']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ self._handle_error(e)
+ raise
+
+ self._download_webpage(
+ 'https://shahid.mbc.net/populateContext',
+ None, 'Populate Context', data=urlencode_postdata({
+ 'firstName': user_data['firstName'],
+ 'lastName': user_data['lastName'],
+ 'userName': user_data['email'],
+ 'csg_user_name': user_data['email'],
+ 'subscriberId': user_data['id'],
+ 'sessionId': user_data['sessionId'],
+ }))
+
+ def _real_extract(self, url):
+ page_type, video_id = re.match(self._VALID_URL, url).groups()
+ if page_type == 'clip':
+ page_type = 'episode'
+
+ playout = self._call_api(
+ 'playout/new/url/' + video_id, video_id)['playout']
+
+ if playout.get('drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ formats = self._extract_m3u8_formats(re.sub(
+ # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
+ r'aws\.manifestfilter=[\w:;,-]+&?',
+ '', playout['url']), video_id, 'mp4')
+ self._sort_formats(formats)
+
+ # video = self._call_api(
+ # 'product/id', video_id, {
+ # 'id': video_id,
+ # 'productType': 'ASSET',
+ # 'productSubType': page_type.upper()
+ # })['productModel']
+
+ response = self._download_json(
+ 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
+ video_id, 'Downloading video JSON', query={
+ 'apiKey': 'sh@hid0nlin3',
+ 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
+ })
+ data = response.get('data', {})
+ error = data.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
+ expected=True)
+
+ video = data[page_type]
+ title = video['title']
+ categories = [
+ category['name']
+ for category in video.get('genres', []) if 'name' in category]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('thumbnailUrl'),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('referenceDate')),
+ 'categories': categories,
+ 'series': video.get('showTitle') or video.get('showName'),
+ 'season': video.get('seasonTitle'),
+ 'season_number': int_or_none(video.get('seasonNumber')),
+ 'season_id': str_or_none(video.get('seasonId')),
+ 'episode_number': int_or_none(video.get('number')),
+ 'episode_id': video_id,
+ 'formats': formats,
+ }
+
+
+class ShahidShowIE(ShahidBaseIE):
+ _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
+ 'info_dict': {
+ 'id': '79187',
+ 'title': 'رامز قرش البحر',
+ 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff',
+ },
+ 'playlist_mincount': 32,
+ }, {
+ 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861',
+ 'only_matching': True
+ }]
+ _PAGE_SIZE = 30
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ product = self._call_api(
+ 'playableAsset', show_id, {'showId': show_id})['productModel']
+ playlist = product['playlist']
+ playlist_id = playlist['id']
+ show = product.get('show', {})
+
+ def page_func(page_num):
+ playlist = self._call_api(
+ 'product/playlist', show_id, {
+ 'playListId': playlist_id,
+ 'pageNumber': page_num,
+ 'pageSize': 30,
+ 'sorts': [{
+ 'order': 'DESC',
+ 'type': 'SORTDATE'
+ }],
+ })
+ for product in playlist.get('productList', {}).get('products', []):
+ product_url = product.get('productUrl', []).get('url')
+ if not product_url:
+ continue
+ yield self.url_result(
+ product_url, 'Shahid',
+ str_or_none(product.get('id')),
+ product.get('title'))
+
+ entries = InAdvancePagedList(
+ page_func,
+ math.ceil(playlist['count'] / self._PAGE_SIZE),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('description'))
diff --git a/hypervideo_dl/extractor/shared.py b/hypervideo_dl/extractor/shared.py
new file mode 100644
index 0000000..93ab2a1
--- /dev/null
+++ b/hypervideo_dl/extractor/shared.py
@@ -0,0 +1,141 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote_plus,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ KNOWN_EXTENSIONS,
+ parse_filesize,
+ rot47,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class SharedBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ if self._FILE_NOT_FOUND in webpage:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ video_url = self._extract_video_url(webpage, video_id, url)
+
+ title = self._extract_title(webpage)
+ filesize = int_or_none(self._extract_filesize(webpage))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'filesize': filesize,
+ 'title': title,
+ }
+
+ def _extract_title(self, webpage):
+ return compat_b64decode(self._html_search_meta(
+ 'full:title', webpage, 'title')).decode('utf-8')
+
+ def _extract_filesize(self, webpage):
+ return self._html_search_meta(
+ 'full:size', webpage, 'file size', fatal=False)
+
+
+class SharedIE(SharedBaseIE):
+ IE_DESC = 'shared.sx'
+ _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
+ _FILE_NOT_FOUND = '>File does not exist<'
+
+ _TEST = {
+ 'url': 'http://shared.sx/0060718775',
+ 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
+ 'info_dict': {
+ 'id': '0060718775',
+ 'ext': 'mp4',
+ 'title': 'Bmp4',
+ 'filesize': 1720110,
+ },
+ }
+
+ def _extract_video_url(self, webpage, video_id, url):
+ download_form = self._hidden_inputs(webpage)
+
+ video_page = self._download_webpage(
+ url, video_id, 'Downloading video page',
+ data=urlencode_postdata(download_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ })
+
+ video_url = self._html_search_regex(
+ r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ video_page, 'video URL', group='url')
+
+ return video_url
+
+
+class VivoIE(SharedBaseIE):
+ IE_DESC = 'vivo.sx'
+ _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
+ _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
+
+ _TESTS = [{
+ 'url': 'http://vivo.sx/d7ddda0e78',
+ 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
+ 'info_dict': {
+ 'id': 'd7ddda0e78',
+ 'ext': 'mp4',
+ 'title': 'Chicken',
+ 'filesize': 515659,
+ },
+ }, {
+ 'url': 'http://vivo.st/d7ddda0e78',
+ 'only_matching': True,
+ }]
+
+ def _extract_title(self, webpage):
+ title = self._html_search_regex(
+ r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
+ 'title', default=None, group='title')
+ if title:
+ ext = determine_ext(title)
+ if ext.lower() in KNOWN_EXTENSIONS:
+ title = title.rpartition('.' + ext)[0]
+ return title
+ return self._og_search_title(webpage)
+
+ def _extract_filesize(self, webpage):
+ return parse_filesize(self._search_regex(
+ r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
+ webpage, 'filesize', fatal=False))
+
+ def _extract_video_url(self, webpage, video_id, url):
+ def decode_url_old(encoded_url):
+ return compat_b64decode(encoded_url).decode('utf-8')
+
+ stream_url = self._search_regex(
+ r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'stream url', default=None, group='url')
+ if stream_url:
+ stream_url = url_or_none(decode_url_old(stream_url))
+ if stream_url:
+ return stream_url
+
+ def decode_url(encoded_url):
+ return rot47(compat_urllib_parse_unquote_plus(encoded_url))
+
+ return decode_url(self._parse_json(
+ self._search_regex(
+ r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage,
+ 'stream'),
+ video_id, transform_source=js_to_json)['source'])
diff --git a/hypervideo_dl/extractor/showroomlive.py b/hypervideo_dl/extractor/showroomlive.py
new file mode 100644
index 0000000..efd9d56
--- /dev/null
+++ b/hypervideo_dl/extractor/showroomlive.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urljoin,
+)
+
+
+class ShowRoomLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.showroom-live.com/48_Nana_Okada',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ broadcaster_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, broadcaster_id)
+
+ room_id = self._search_regex(
+ (r'SrGlobal\.roomId\s*=\s*(\d+)',
+ r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id')
+
+ room = self._download_json(
+ urljoin(url, '/api/room/profile?room_id=%s' % room_id),
+ broadcaster_id)
+
+ is_live = room.get('is_onlive')
+ if is_live is not True:
+ raise ExtractorError('%s is offline' % broadcaster_id, expected=True)
+
+ uploader = room.get('performer_name') or broadcaster_id
+ title = room.get('room_name') or room.get('main_name') or uploader
+
+ streaming_url_list = self._download_json(
+ urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id),
+ broadcaster_id)['streaming_url_list']
+
+ formats = []
+ for stream in streaming_url_list:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ stream_type = stream.get('type')
+ if stream_type == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ stream_url, broadcaster_id, ext='mp4', m3u8_id='hls',
+ live=True)
+ for f in m3u8_formats:
+ f['quality'] = int_or_none(stream.get('quality', 100))
+ formats.extend(m3u8_formats)
+ elif stream_type == 'rtmp':
+ stream_name = stream.get('stream_name')
+ if not stream_name:
+ continue
+ formats.append({
+ 'url': stream_url,
+ 'play_path': stream_name,
+ 'page_url': url,
+ 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf',
+ 'rtmp_live': True,
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ 'format_note': stream.get('label'),
+ 'quality': int_or_none(stream.get('quality', 100)),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': compat_str(room.get('live_id') or broadcaster_id),
+ 'title': self._live_title(title),
+ 'description': room.get('description'),
+ 'timestamp': int_or_none(room.get('current_live_started_at')),
+ 'uploader': uploader,
+ 'uploader_id': broadcaster_id,
+ 'view_count': int_or_none(room.get('view_num')),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/simplecast.py b/hypervideo_dl/extractor/simplecast.py
new file mode 100644
index 0000000..2d0b3c0
--- /dev/null
+++ b/hypervideo_dl/extractor/simplecast.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class SimplecastBaseIE(InfoExtractor):
+ _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+ _API_BASE = 'https://api.simplecast.com/'
+
+ def _call_api(self, path_tmpl, video_id):
+ return self._download_json(
+ self._API_BASE + path_tmpl % video_id, video_id)
+
+ def _call_search_api(self, resource, resource_id, resource_url):
+ return self._download_json(
+ 'https://api.simplecast.com/%ss/search' % resource, resource_id,
+ data=urlencode_postdata({'url': resource_url}))
+
+ def _parse_episode(self, episode):
+ episode_id = episode['id']
+ title = episode['title'].strip()
+ audio_file = episode.get('audio_file') or {}
+ audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
+
+ season = episode.get('season') or {}
+ season_href = season.get('href')
+ season_id = None
+ if season_href:
+ season_id = self._search_regex(
+ r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
+ season_href, 'season id', default=None)
+
+ webpage_url = episode.get('episode_url')
+ channel_url = None
+ if webpage_url:
+ channel_url = self._search_regex(
+ r'(https?://[^/]+\.simplecast\.com)',
+ webpage_url, 'channel url', default=None)
+
+ return {
+ 'id': episode_id,
+ 'display_id': episode.get('slug'),
+ 'title': title,
+ 'url': clean_podcast_url(audio_file_url),
+ 'webpage_url': webpage_url,
+ 'channel_url': channel_url,
+ 'series': try_get(episode, lambda x: x['podcast']['title']),
+ 'season_number': int_or_none(season.get('number')),
+ 'season_id': season_id,
+ 'thumbnail': episode.get('image_url'),
+ 'episode_id': episode_id,
+ 'episode_number': int_or_none(episode.get('number')),
+ 'description': strip_or_none(episode.get('description')),
+ 'timestamp': parse_iso8601(episode.get('published_at')),
+ 'duration': int_or_none(episode.get('duration')),
+ 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
+ }
+
+
+class SimplecastIE(SimplecastBaseIE):
+ IE_NAME = 'simplecast'
+ _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
+ _COMMON_TEST_INFO = {
+ 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
+ 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'ext': 'mp3',
+ 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
+ 'episode_number': 1,
+ 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
+ 'season_number': 1,
+ 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
+ 'series': 'The RE:BIND.io Podcast',
+ 'duration': 5343,
+ 'timestamp': 1580979475,
+ 'upload_date': '20200206',
+ 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
+ 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
+ }
+ _TESTS = [{
+ 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'md5': '8c93be7be54251bf29ee97464eabe61c',
+ 'info_dict': _COMMON_TEST_INFO,
+ }, {
+ 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'''(?x)<iframe[^>]+src=["\']
+ (
+ https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
+ player\.simplecast\.com/%s
+ ))''' % SimplecastBaseIE._UUID_REGEX, webpage)
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api('episodes/%s', episode_id)
+ return self._parse_episode(episode)
+
+
+class SimplecastEpisodeIE(SimplecastBaseIE):
+ IE_NAME = 'simplecast:episode'
+ _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
+ 'md5': '8c93be7be54251bf29ee97464eabe61c',
+ 'info_dict': SimplecastIE._COMMON_TEST_INFO,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ episode = self._call_search_api(
+ 'episode', mobj.group(1), mobj.group(0))
+ return self._parse_episode(episode)
+
+
+class SimplecastPodcastIE(SimplecastBaseIE):
+ IE_NAME = 'simplecast:podcast'
+ _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://the-re-bind-io-podcast.simplecast.com',
+ 'playlist_mincount': 33,
+ 'info_dict': {
+ 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
+ 'title': 'The RE:BIND.io Podcast',
+ },
+ }, {
+ 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ subdomain = self._match_id(url)
+ site = self._call_search_api('site', subdomain, url)
+ podcast = site['podcast']
+ podcast_id = podcast['id']
+ podcast_title = podcast.get('title')
+
+ def entries():
+ episodes = self._call_api('podcasts/%s/episodes', podcast_id)
+ for episode in (episodes.get('collection') or []):
+ info = self._parse_episode(episode)
+ info['series'] = podcast_title
+ yield info
+
+ return self.playlist_result(entries(), podcast_id, podcast_title)
diff --git a/hypervideo_dl/extractor/sina.py b/hypervideo_dl/extractor/sina.py
new file mode 100644
index 0000000..07b766b
--- /dev/null
+++ b/hypervideo_dl/extractor/sina.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ ExtractorError,
+ int_or_none,
+ update_url_query,
+ qualities,
+ get_element_by_attribute,
+ clean_html,
+)
+
+
+class SinaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
+ (?:
+ (?:view/|.*\#)(?P<video_id>\d+)|
+ .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
+ # This is used by external sites like Weibo
+ api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
+ )
+ '''
+
+ _TESTS = [
+ {
+ 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
+ 'md5': 'd38433e2fc886007729735650ae4b3e9',
+ 'info_dict': {
+ 'id': '250576622',
+ 'ext': 'mp4',
+ 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
+ }
+ },
+ {
+ 'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html',
+ 'info_dict': {
+ 'id': '101314253',
+ 'ext': 'flv',
+ 'title': '军方提高对朝情报监视级别',
+ },
+ 'skip': 'the page does not exist or has been deleted',
+ },
+ {
+ 'url': 'http://video.sina.com.cn/view/250587748.html',
+ 'md5': '3d1807a25c775092aab3bc157fff49b4',
+ 'info_dict': {
+ 'id': '250587748',
+ 'ext': 'mp4',
+ 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('video_id')
+ if not video_id:
+ if mobj.group('token') is not None:
+ # The video id is in the redirected url
+ self.to_screen('Getting video id')
+ request = HEADRequest(url)
+ _, urlh = self._download_webpage_handle(request, 'NA', False)
+ return self._real_extract(urlh.geturl())
+ else:
+ pseudo_id = mobj.group('pseudo_id')
+ webpage = self._download_webpage(url, pseudo_id)
+ error = get_element_by_attribute('class', 'errtitle', webpage)
+ if error:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, clean_html(error)), expected=True)
+ video_id = self._search_regex(
+ r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
+
+ video_data = self._download_json(
+ 'http://s.video.sina.com.cn/video/h5play',
+ video_id, query={'video_id': video_id})
+ if video_data['code'] != 1:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, video_data['message']), expected=True)
+ else:
+ video_data = video_data['data']
+ title = video_data['title']
+ description = video_data.get('description')
+ if description:
+ description = description.strip()
+
+ preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
+ formats = []
+ for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
+ file_api = quality.get('file_api')
+ file_id = quality.get('file_id')
+ if not file_api or not file_id:
+ continue
+ formats.append({
+ 'format_id': quality_id,
+ 'url': update_url_query(file_api, {'vid': file_id}),
+ 'preference': preference(quality_id),
+ 'ext': 'mp4',
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': video_data.get('image'),
+ 'duration': int_or_none(video_data.get('length')),
+ 'timestamp': int_or_none(video_data.get('create_time')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/sixplay.py b/hypervideo_dl/extractor/sixplay.py
new file mode 100644
index 0000000..7ec66ec
--- /dev/null
+++ b/hypervideo_dl/extractor/sixplay.py
@@ -0,0 +1,129 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ try_get,
+ qualities,
+)
+
+
+class SixPlayIE(InfoExtractor):
+ IE_NAME = '6play'
+ _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
+ 'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
+ 'info_dict': {
+ 'id': '12041051',
+ 'ext': 'mp4',
+ 'title': 'Le but qui a marqué l\'histoire du football français !',
+ 'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
+ },
+ }, {
+ 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id = re.search(self._VALID_URL, url).groups()
+ service, consumer_name = {
+ '6play.fr': ('6play', 'm6web'),
+ 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
+ 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
+ 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'),
+ }.get(domain, ('6play', 'm6web'))
+
+ data = self._download_json(
+ 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id),
+ video_id, headers={
+ 'x-customer-name': consumer_name
+ }, query={
+ 'csa': 5,
+ 'with': 'clips',
+ })
+
+ clip_data = data['clips'][0]
+ title = clip_data['title']
+
+ urls = []
+ quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
+ formats = []
+ subtitles = {}
+ assets = clip_data.get('assets') or []
+ for asset in assets:
+ asset_url = asset.get('full_physical_path')
+ protocol = asset.get('protocol')
+ if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
+ continue
+ urls.append(asset_url)
+ container = asset.get('video_container')
+ ext = determine_ext(asset_url)
+ if protocol == 'http_subtitle' or ext == 'vtt':
+ subtitles.setdefault('fr', []).append({'url': asset_url})
+ continue
+ if container == 'm3u8' or ext == 'm3u8':
+ if protocol == 'usp':
+ if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
+ urlh = self._request_webpage(
+ asset_url, video_id, fatal=False,
+ headers=self.geo_verification_headers())
+ if not urlh:
+ continue
+ asset_url = urlh.geturl()
+ asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
+ for i in range(3, 0, -1):
+ asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
+ m3u8_formats = self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+ formats.extend(self._extract_mpd_formats(
+ asset_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if m3u8_formats:
+ break
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif container == 'mp4' or ext == 'mp4':
+ quality = asset.get('video_quality')
+ formats.append({
+ 'url': asset_url,
+ 'format_id': quality,
+ 'quality': quality_key(quality),
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ def get(getter):
+ for src in (data, clip_data):
+ v = try_get(src, getter, compat_str)
+ if v:
+ return v
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get(lambda x: x['description']),
+ 'duration': int_or_none(clip_data.get('duration')),
+ 'series': get(lambda x: x['program']['title']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/sky.py b/hypervideo_dl/extractor/sky.py
new file mode 100644
index 0000000..ff2c977
--- /dev/null
+++ b/hypervideo_dl/extractor/sky.py
@@ -0,0 +1,131 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ smuggle_url,
+ strip_or_none,
+ urljoin,
+)
+
+
+class SkyBaseIE(InfoExtractor):
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+ _SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'
+
+ def _process_ooyala_element(self, webpage, sdc_el, url):
+ sdc = extract_attributes(sdc_el)
+ provider = sdc.get('data-provider')
+ if provider == 'ooyala':
+ video_id = sdc['data-sdc-video-id']
+ video_url = 'ooyala:%s' % video_id
+ ie_key = 'Ooyala'
+ ooyala_el = self._search_regex(
+ r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id,
+ webpage, 'video data', fatal=False)
+ if ooyala_el:
+ ooyala_attrs = extract_attributes(ooyala_el) or {}
+ if ooyala_attrs.get('data-token-required') == 'true':
+ token_fetch_url = (self._parse_json(ooyala_attrs.get(
+ 'data-token-fetch-options', '{}'),
+ video_id, fatal=False) or {}).get('url')
+ if token_fetch_url:
+ embed_token = self._download_json(urljoin(
+ url, token_fetch_url), video_id, fatal=False)
+ if embed_token:
+ video_url = smuggle_url(
+ video_url, {'embed_token': embed_token})
+ elif provider == 'brightcove':
+ video_id = sdc['data-video-id']
+ account_id = sdc.get('data-account-id') or '6058004172001'
+ player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
+ video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id)
+ ie_key = 'BrightcoveNew'
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': video_url,
+ 'ie_key': ie_key,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = self._process_ooyala_element(webpage, self._search_regex(
+ self._SDC_EL_REGEX, webpage, 'sdc element'), url)
+ info.update({
+ 'title': self._og_search_title(webpage),
+ 'description': strip_or_none(self._og_search_description(webpage)),
+ })
+ return info
+
+
+class SkySportsIE(SkyBaseIE):
+ IE_NAME = 'sky:sports'
+ _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
+ 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
+ 'info_dict': {
+ 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ',
+ 'ext': 'mp4',
+ 'title': 'Bale: It\'s our time to shine',
+ 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.skysports.com/watch/video/tv-shows/12118508/rainford-brent-how-ace-programme-helps',
+ 'only_matching': True,
+ }]
+
+
+class SkyNewsIE(SkyBaseIE):
+ IE_NAME = 'sky:news'
+ _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
+ 'md5': '411e8893fd216c75eaf7e4c65d364115',
+ 'info_dict': {
+ 'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
+ 'ext': 'mp4',
+ 'title': 'Russian plane inspected after deadly fire',
+ 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
+ 'uploader_id': '6058004172001',
+ 'timestamp': 1567112345,
+ 'upload_date': '20190829',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+
+
+class SkySportsNewsIE(SkyBaseIE):
+ IE_NAME = 'sky:sports:news'
+ _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass',
+ 'info_dict': {
+ 'id': '10871916',
+ 'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass',
+ 'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.',
+ },
+ 'playlist_count': 2,
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ entries = []
+ for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
+ entries.append(self._process_ooyala_element(webpage, sdc_el, url))
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage),
+ self._html_search_meta(['og:description', 'description'], webpage))
diff --git a/hypervideo_dl/extractor/skyit.py b/hypervideo_dl/extractor/skyit.py
new file mode 100644
index 0000000..14a4d8d
--- /dev/null
+++ b/hypervideo_dl/extractor/skyit.py
@@ -0,0 +1,239 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ dict_get,
+ int_or_none,
+ parse_duration,
+ unified_timestamp,
+)
+
+
+class SkyItPlayerIE(InfoExtractor):
+ IE_NAME = 'player.sky.it'
+ _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _DOMAIN = 'sky'
+ _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
+ # http://static.sky.it/static/skyplayer/conf.json
+ _TOKEN_MAP = {
+ 'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
+ 'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
+ 'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
+ 'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
+ 'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
+ 'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
+ 'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
+ 'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
+ 'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
+ }
+
+ def _player_url_result(self, video_id):
+ return self.url_result(
+ self._PLAYER_TMPL % (video_id, self._DOMAIN),
+ SkyItPlayerIE.ie_key(), video_id)
+
+ def _parse_video(self, video, video_id):
+ title = video['title']
+ is_live = video.get('type') == 'live'
+ hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
+ if not hls_url and video.get('geoblock' if is_live else 'geob'):
+ self.raise_geo_restricted(countries=['IT'])
+
+ if is_live:
+ formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
+ else:
+ formats = self._extract_akamai_formats(
+ hls_url, video_id, {'http': 'videoplatform.sky.it'})
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
+ 'description': video.get('short_desc') or None,
+ 'timestamp': unified_timestamp(video.get('create_date')),
+ 'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
+ 'is_live': is_live,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ domain = compat_parse_qs(compat_urllib_parse_urlparse(
+ url).query).get('domain', [None])[0]
+ token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
+ video = self._download_json(
+ 'https://apid.sky.it/vdp/v1/getVideoData',
+ video_id, query={
+ 'caller': 'sky',
+ 'id': video_id,
+ 'token': token
+ }, headers=self.geo_verification_headers())
+ return self._parse_video(video, video_id)
+
+
+class SkyItVideoIE(SkyItPlayerIE):
+ IE_NAME = 'video.sky.it'
+ _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
+ 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
+ 'info_dict': {
+ 'id': '631227',
+ 'ext': 'mp4',
+ 'title': 'Uomo ucciso da uno squalo in Australia',
+ 'timestamp': 1606036192,
+ 'upload_date': '20201122',
+ }
+ }, {
+ 'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._player_url_result(video_id)
+
+
+class SkyItVideoLiveIE(SkyItPlayerIE):
+ IE_NAME = 'video.sky.it:live'
+ _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://video.sky.it/diretta/tg24',
+ 'info_dict': {
+ 'id': '1',
+ 'ext': 'mp4',
+ 'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ asset_id = compat_str(self._parse_json(self._search_regex(
+ r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
+ webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
+ livestream = self._download_json(
+ 'https://apid.sky.it/vdp/v1/getLivestream',
+ asset_id, query={'id': asset_id})
+ return self._parse_video(livestream, asset_id)
+
+
+class SkyItIE(SkyItPlayerIE):
+ IE_NAME = 'sky.it'
+ _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
+ 'info_dict': {
+ 'id': '631201',
+ 'ext': 'mp4',
+ 'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
+ 'upload_date': '20201121',
+ 'timestamp': 1605995753,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
+ 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
+ 'info_dict': {
+ 'id': '631227',
+ 'ext': 'mp4',
+ 'title': 'Uomo ucciso da uno squalo in Australia',
+ 'timestamp': 1606036192,
+ 'upload_date': '20201122',
+ },
+ }]
+ _VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ self._VIDEO_ID_REGEX, webpage, 'video id')
+ return self._player_url_result(video_id)
+
+
+class SkyItAcademyIE(SkyItIE):
+ IE_NAME = 'skyacademy.it'
+ _VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
+ 'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
+ 'info_dict': {
+ 'id': '523458',
+ 'ext': 'mp4',
+ 'title': 'Sky Academy "The Best CineCamp 2019"',
+ 'timestamp': 1562843784,
+ 'upload_date': '20190711',
+ }
+ }]
+ _DOMAIN = 'skyacademy'
+ _VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
+
+
+class SkyItArteIE(SkyItIE):
+ IE_NAME = 'arte.sky.it'
+ _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
+ 'md5': '515aee97b87d7a018b6c80727d3e7e17',
+ 'info_dict': {
+ 'id': '627926',
+ 'ext': 'mp4',
+ 'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
+ 'upload_date': '20201106',
+ 'timestamp': 1604664493,
+ }
+ }]
+ _DOMAIN = 'skyarte'
+ _VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
+
+
+class CieloTVItIE(SkyItIE):
+ IE_NAME = 'cielotv.it'
+ _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
+ 'md5': 'c4deed77552ba901c2a0d9258320304b',
+ 'info_dict': {
+ 'id': '499240',
+ 'ext': 'mp4',
+ 'title': 'Il lunedì è sempre un dramma',
+ 'upload_date': '20190329',
+ 'timestamp': 1553862178,
+ }
+ }]
+ _DOMAIN = 'cielo'
+ _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
+
+
+class TV8ItIE(SkyItVideoIE):
+ IE_NAME = 'tv8.it'
+ _VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
+ 'md5': '9ab906a3f75ea342ed928442f9dabd21',
+ 'info_dict': {
+ 'id': '630529',
+ 'ext': 'mp4',
+ 'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
+ 'timestamp': 1605721374,
+ 'upload_date': '20201118',
+ }
+ }]
+ _DOMAIN = 'mtv8'
diff --git a/hypervideo_dl/extractor/skylinewebcams.py b/hypervideo_dl/extractor/skylinewebcams.py
new file mode 100644
index 0000000..b7f8ac7
--- /dev/null
+++ b/hypervideo_dl/extractor/skylinewebcams.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class SkylineWebcamsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html'
+ _TEST = {
+ 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html',
+ 'info_dict': {
+ 'id': 'scalinata-piazza-di-spagna-barcaccia',
+ 'ext': 'mp4',
+ 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ stream_url = self._search_regex(
+ r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
+ 'stream url', group='url')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ return {
+ 'id': video_id,
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'title': self._live_title(title),
+ 'description': description,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/skynewsarabia.py b/hypervideo_dl/extractor/skynewsarabia.py
new file mode 100644
index 0000000..fffc9aa
--- /dev/null
+++ b/hypervideo_dl/extractor/skynewsarabia.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ parse_duration,
+)
+
+
+class SkyNewsArabiaBaseIE(InfoExtractor):
+ _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images'
+
+ def _call_api(self, path, value):
+ return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value)
+
+ def _get_limelight_media_id(self, url):
+ return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id')
+
+ def _get_image_url(self, image_path_template, width='1600', height='1200'):
+ return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height)
+
+ def _extract_video_info(self, video_data):
+ video_id = compat_str(video_data['id'])
+ topic = video_data.get('topicTitle')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']),
+ 'id': video_id,
+ 'title': video_data['headline'],
+ 'description': video_data.get('summary'),
+ 'thumbnail': self._get_image_url(video_data['mediaAsset']['imageUrl']),
+ 'timestamp': parse_iso8601(video_data.get('date')),
+ 'duration': parse_duration(video_data.get('runTime')),
+ 'tags': video_data.get('tags', []),
+ 'categories': [topic] if topic else [],
+ 'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id,
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
+ IE_NAME = 'skynewsarabia:video'
+ _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.skynewsarabia.com/web/video/794902/%D9%86%D8%B5%D9%81-%D9%85%D9%84%D9%8A%D9%88%D9%86-%D9%85%D8%B5%D8%A8%D8%A7%D8%AD-%D8%B4%D8%AC%D8%B1%D8%A9-%D9%83%D8%B1%D9%8A%D8%B3%D9%85%D8%A7%D8%B3',
+ 'info_dict': {
+ 'id': '794902',
+ 'ext': 'flv',
+ 'title': 'نصف مليون مصباح على شجرة كريسماس',
+ 'description': 'md5:22f1b27f0850eeb10c7e59b1f16eb7c6',
+ 'upload_date': '20151128',
+ 'timestamp': 1448697198,
+ 'duration': 2119,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._call_api('video', video_id)
+ return self._extract_video_info(video_data)
+
+
+class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
+ IE_NAME = 'skynewsarabia:article'
+ _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',
+ 'info_dict': {
+ 'id': '794549',
+ 'ext': 'flv',
+ 'title': 'بالفيديو.. ألعاب ذكية تحاكي واقع المنطقة',
+ 'description': 'md5:0c373d29919a851e080ee4edd0c5d97f',
+ 'upload_date': '20151126',
+ 'timestamp': 1448559336,
+ 'duration': 281.6,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.skynewsarabia.com/web/article/794844/%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D9%82%D9%88%D8%A7%D8%B1%D8%A8-%D8%A7%D9%94%D8%B3%D9%84%D8%AD%D8%A9-%D9%84%D9%85%D9%8A%D9%84%D9%8A%D8%B4%D9%8A%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D9%88%D8%AB%D9%8A-%D9%88%D8%B5%D8%A7%D9%84%D8%AD',
+ 'info_dict': {
+ 'id': '794844',
+ 'title': 'إحباط تهريب أسلحة لميليشيات الحوثي وصالح بجنوب اليمن',
+ 'description': 'md5:5c927b8b2e805796e7f693538d96fc7e',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ article_data = self._call_api('article', article_id)
+ media_asset = article_data['mediaAsset']
+ if media_asset['type'] == 'VIDEO':
+ topic = article_data.get('topicTitle')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']),
+ 'id': article_id,
+ 'title': article_data['headline'],
+ 'description': article_data.get('summary'),
+ 'thumbnail': self._get_image_url(media_asset['imageUrl']),
+ 'timestamp': parse_iso8601(article_data.get('date')),
+ 'tags': article_data.get('tags', []),
+ 'categories': [topic] if topic else [],
+ 'webpage_url': url,
+ 'ie_key': 'LimelightMedia',
+ }
+ entries = [self._extract_video_info(item) for item in article_data.get('inlineItems', []) if item['type'] == 'VIDEO']
+ return self.playlist_result(entries, article_id, article_data['headline'], article_data.get('summary'))
diff --git a/hypervideo_dl/extractor/slideshare.py b/hypervideo_dl/extractor/slideshare.py
new file mode 100644
index 0000000..e89ebeb
--- /dev/null
+++ b/hypervideo_dl/extractor/slideshare.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+)
+
+
+class SlideshareIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
+
+ _TEST = {
+ 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
+ 'info_dict': {
+ 'id': '25665706',
+ 'ext': 'mp4',
+ 'title': 'Managing Scale and Complexity',
+ 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ page_title = mobj.group('title')
+ webpage = self._download_webpage(url, page_title)
+ slideshare_obj = self._search_regex(
+ r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);',
+ webpage, 'slideshare object')
+ info = json.loads(slideshare_obj)
+ if info['slideshow']['type'] != 'video':
+ raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
+
+ doc = info['doc']
+ bucket = info['jsplayer']['video_bucket']
+ ext = info['jsplayer']['video_extension']
+ video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
+ description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
+ r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
+ 'description', fatal=False)
+
+ return {
+ '_type': 'video',
+ 'id': info['slideshow']['id'],
+ 'title': info['slideshow']['title'],
+ 'ext': ext,
+ 'url': video_url,
+ 'thumbnail': info['slideshow']['pin_image_url'],
+ 'description': description.strip() if description else None,
+ }
diff --git a/hypervideo_dl/extractor/slideslive.py b/hypervideo_dl/extractor/slideslive.py
new file mode 100644
index 0000000..9409a01
--- /dev/null
+++ b/hypervideo_dl/extractor/slideslive.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ bool_or_none,
+ smuggle_url,
+ try_get,
+ url_or_none,
+)
+
+
+class SlidesLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # video_service_name = YOUTUBE
+ 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
+ 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
+ 'info_dict': {
+ 'id': 'LMtgR8ba0b0',
+ 'ext': 'mp4',
+ 'title': 'GCC IA16 backend',
+ 'description': 'Watch full version of this video at https://slideslive.com/38902413.',
+ 'uploader': 'SlidesLive Videos - A',
+ 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
+ 'timestamp': 1597615266,
+ 'upload_date': '20170925',
+ }
+ }, {
+ # video_service_name = yoda
+ 'url': 'https://slideslive.com/38935785',
+ 'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
+ 'info_dict': {
+ 'id': 'RMraDYN5ozA_',
+ 'ext': 'mp4',
+ 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ # video_service_name = youtube
+ 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
+ 'only_matching': True,
+ }, {
+ # video_service_name = url
+ 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
+ 'only_matching': True,
+ }, {
+ # video_service_name = vimeo
+ 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://ben.slideslive.com/player/' + video_id, video_id)
+ service_name = video_data['video_service_name'].lower()
+ assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
+ service_id = video_data['video_service_id']
+ subtitles = {}
+ for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
+ if not isinstance(sub, dict):
+ continue
+ webvtt_url = url_or_none(sub.get('webvtt_url'))
+ if not webvtt_url:
+ continue
+ lang = sub.get('language') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': webvtt_url,
+ })
+ info = {
+ 'id': video_id,
+ 'thumbnail': video_data.get('thumbnail'),
+ 'is_live': bool_or_none(video_data.get('is_live')),
+ 'subtitles': subtitles,
+ }
+ if service_name in ('url', 'yoda'):
+ info['title'] = video_data['title']
+ if service_name == 'url':
+ info['url'] = service_id
+ else:
+ formats = []
+ _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
+ # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+ formats.extend(self._extract_m3u8_formats(
+ _MANIFEST_PATTERN % (service_id, 'm3u8'),
+ service_id, 'mp4', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ _MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
+ mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+ info.update({
+ 'id': service_id,
+ 'formats': formats,
+ })
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'url': service_id,
+ 'ie_key': service_name.capitalize(),
+ 'title': video_data.get('title'),
+ })
+ if service_name == 'vimeo':
+ info['url'] = smuggle_url(
+ 'https://player.vimeo.com/video/' + service_id,
+ {'http_headers': {'Referer': url}})
+ return info
diff --git a/hypervideo_dl/extractor/slutload.py b/hypervideo_dl/extractor/slutload.py
new file mode 100644
index 0000000..661f9e5
--- /dev/null
+++ b/hypervideo_dl/extractor/slutload.py
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class SlutloadIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
+ 'md5': '868309628ba00fd488cf516a113fd717',
+ 'info_dict': {
+ 'id': 'TD73btpBqSxc',
+ 'ext': 'mp4',
+ 'title': 'virginie baisee en cam',
+ 'age_limit': 18,
+ 'thumbnail': r're:https?://.*?\.jpg'
+ },
+ }, {
+ # mobile site
+ 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ embed_page = self._download_webpage(
+ 'http://www.slutload.com/embed_player/%s' % video_id, video_id,
+ 'Downloading embed page', fatal=False)
+
+ if embed_page:
+ def extract(what):
+ return self._html_search_regex(
+ r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
+ embed_page, 'video %s' % what, default=None, group='url')
+
+ video_url = extract('url')
+ if video_url:
+ title = self._html_search_regex(
+ r'<title>([^<]+)', embed_page, 'title', default=video_id)
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': extract('preview'),
+ 'age_limit': 18
+ }
+
+ webpage = self._download_webpage(
+ 'http://www.slutload.com/video/_/%s/' % video_id, video_id)
+ title = self._html_search_regex(
+ r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/snotr.py b/hypervideo_dl/extractor/snotr.py
new file mode 100644
index 0000000..f773547
--- /dev/null
+++ b/hypervideo_dl/extractor/snotr.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_filesize,
+ str_to_int,
+)
+
+
+class SnotrIE(InfoExtractor):
+ _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
+ _TESTS = [{
+ 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
+ 'info_dict': {
+ 'id': '13708',
+ 'ext': 'mp4',
+ 'title': 'Drone flying through fireworks!',
+ 'duration': 248,
+ 'filesize_approx': 40700000,
+ 'description': 'A drone flying through Fourth of July Fireworks',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'expected_warnings': ['description'],
+ }, {
+ 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
+ 'info_dict': {
+ 'id': '530',
+ 'ext': 'mp4',
+ 'title': 'David Letteman - George W. Bush Top 10',
+ 'duration': 126,
+ 'filesize_approx': 8500000,
+ 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+
+ description = self._og_search_description(webpage)
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
+
+ view_count = str_to_int(self._html_search_regex(
+ r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
+ webpage, 'view count', fatal=False))
+
+ duration = parse_duration(self._html_search_regex(
+ r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
+ webpage, 'duration', fatal=False))
+
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
+ webpage, 'filesize', fatal=False))
+
+ info_dict.update({
+ 'id': video_id,
+ 'description': description,
+ 'title': title,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/sohu.py b/hypervideo_dl/extractor/sohu.py
new file mode 100644
index 0000000..9d73650
--- /dev/null
+++ b/hypervideo_dl/extractor/sohu.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+
+class SohuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
+
+ # Sohu videos give different MD5 sums on Travis CI and my machine
+ _TESTS = [{
+ 'note': 'This video is available only in Mainland China',
+ 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
+ 'info_dict': {
+ 'id': '382479172',
+ 'ext': 'mp4',
+ 'title': 'MV:Far East Movement《The Illest》',
+ },
+ 'skip': 'On available in China',
+ }, {
+ 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+ 'info_dict': {
+ 'id': '409385080',
+ 'ext': 'mp4',
+ 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+ }
+ }, {
+ 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+ 'info_dict': {
+ 'id': '78910339',
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '78910339_part1',
+ 'ext': 'mp4',
+ 'duration': 294,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '78910339_part2',
+ 'ext': 'mp4',
+ 'duration': 300,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '78910339_part3',
+ 'ext': 'mp4',
+ 'duration': 150,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }]
+ }, {
+ 'note': 'Video with title containing dash',
+ 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+ 'info_dict': {
+ 'id': '78932792',
+ 'ext': 'mp4',
+ 'title': 'hypervideo testing video',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
+
+ def _real_extract(self, url):
+
+ def _fetch_data(vid_id, mytv=False):
+ if mytv:
+ base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
+ else:
+ base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+
+ return self._download_json(
+ base_data_url + vid_id, video_id,
+ 'Downloading JSON data for %s' % vid_id,
+ headers=self.geo_verification_headers())
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ mytv = mobj.group('mytv') is not None
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
+
+ vid = self._html_search_regex(
+ r'var vid ?= ?["\'](\d+)["\']',
+ webpage, 'video path')
+ vid_data = _fetch_data(vid, mytv)
+ if vid_data['play'] != 1:
+ if vid_data.get('status') == 12:
+ raise ExtractorError(
+ '%s said: There\'s something wrong in the video.' % self.IE_NAME,
+ expected=True)
+ else:
+ self.raise_geo_restricted(
+ '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME)
+
+ formats_json = {}
+ for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
+ vid_id = vid_data['data'].get('%sVid' % format_id)
+ if not vid_id:
+ continue
+ vid_id = compat_str(vid_id)
+ formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
+
+ part_count = vid_data['data']['totalBlocks']
+
+ playlist = []
+ for i in range(part_count):
+ formats = []
+ for format_id, format_data in formats_json.items():
+ allot = format_data['allot']
+
+ data = format_data['data']
+ clips_url = data['clipsURL']
+ su = data['su']
+
+ video_url = 'newflv.sohu.ccgslb.net'
+ cdnId = None
+ retries = 0
+
+ while 'newflv.sohu.ccgslb.net' in video_url:
+ params = {
+ 'prot': 9,
+ 'file': clips_url[i],
+ 'new': su[i],
+ 'prod': 'flash',
+ 'rb': 1,
+ }
+
+ if cdnId is not None:
+ params['idc'] = cdnId
+
+ download_note = 'Downloading %s video URL part %d of %d' % (
+ format_id, i + 1, part_count)
+
+ if retries > 0:
+ download_note += ' (retry #%d)' % retries
+ part_info = self._parse_json(self._download_webpage(
+ 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)),
+ video_id, download_note), video_id)
+
+ video_url = part_info['url']
+ cdnId = part_info.get('nid')
+
+ retries += 1
+ if retries > 5:
+ raise ExtractorError('Failed to get video URL')
+
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'filesize': int_or_none(
+ try_get(data, lambda x: x['clipsBytes'][i])),
+ 'width': int_or_none(data.get('width')),
+ 'height': int_or_none(data.get('height')),
+ 'fps': int_or_none(data.get('fps')),
+ })
+ self._sort_formats(formats)
+
+ playlist.append({
+ 'id': '%s_part%d' % (video_id, i + 1),
+ 'title': title,
+ 'duration': vid_data['data']['clipsDuration'][i],
+ 'formats': formats,
+ })
+
+ if len(playlist) == 1:
+ info = playlist[0]
+ info['id'] = video_id
+ else:
+ info = {
+ '_type': 'multi_video',
+ 'entries': playlist,
+ 'id': video_id,
+ 'title': title,
+ }
+
+ return info
diff --git a/hypervideo_dl/extractor/sonyliv.py b/hypervideo_dl/extractor/sonyliv.py
new file mode 100644
index 0000000..fedfceb
--- /dev/null
+++ b/hypervideo_dl/extractor/sonyliv.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class SonyLIVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
+ 'info_dict': {
+ 'title': 'Bachelors Delight - Achaari Cheese Toast',
+ 'id': '1000022678',
+ 'ext': 'mp4',
+ 'upload_date': '20200411',
+ 'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
+ 'timestamp': 1586632091,
+ 'duration': 185,
+ 'season_number': 1,
+ 'episode': 'Achaari Cheese Toast',
+ 'episode_number': 1,
+ 'release_year': 2016,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['IN']
+ _TOKEN = None
+
+ def _call_api(self, version, path, video_id):
+ headers = {}
+ if self._TOKEN:
+ headers['security_token'] = self._TOKEN
+ try:
+ return self._download_json(
+ 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
+ video_id, headers=headers)['resultObj']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ message = self._parse_json(
+ e.cause.read().decode(), video_id)['message']
+ if message == 'Geoblocked Country':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(message)
+ raise
+
+ def _real_initialize(self):
+ self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ content = self._call_api(
+ '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
+ if content.get('isEncrypted'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ dash_url = content['videoURL']
+ headers = {
+ 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
+ }
+ formats = self._extract_mpd_formats(
+ dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
+ formats.extend(self._extract_m3u8_formats(
+ dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
+ video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+ self._sort_formats(formats)
+
+ metadata = self._call_api(
+ '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
+ title = metadata['title']
+ episode = metadata.get('episodeTitle')
+ if episode and title != episode:
+ title += ' - ' + episode
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': content.get('posterURL'),
+ 'description': metadata.get('longDescription') or metadata.get('shortDescription'),
+ 'timestamp': int_or_none(metadata.get('creationDate'), 1000),
+ 'duration': int_or_none(metadata.get('duration')),
+ 'season_number': int_or_none(metadata.get('season')),
+ 'episode': episode,
+ 'episode_number': int_or_none(metadata.get('episodeNumber')),
+ 'release_year': int_or_none(metadata.get('year')),
+ }
diff --git a/hypervideo_dl/extractor/soundcloud.py b/hypervideo_dl/extractor/soundcloud.py
new file mode 100644
index 0000000..abb85e1
--- /dev/null
+++ b/hypervideo_dl/extractor/soundcloud.py
@@ -0,0 +1,815 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import (
+ InfoExtractor,
+ SearchInfoExtractor
+)
+from ..compat import (
+ compat_HTTPError,
+ compat_kwargs,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ error_to_compat_str,
+ ExtractorError,
+ float_or_none,
+ HEADRequest,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urlhandle_detect_ext,
+)
+
+
+class SoundcloudEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
+ _TEST = {
+ # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
+ 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
+ 'only_matching': True,
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ query = compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ api_url = query['url'][0]
+ secret_token = query.get('secret_token')
+ if secret_token:
+ api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
+ return self.url_result(api_url)
+
+
+class SoundcloudIE(InfoExtractor):
+ """Information extractor for soundcloud.com
+ To access the media, the uid of the song and a stream token
+ must be extracted from the page source and the script must make
+ a request to media.soundcloud.com/crossdomain.xml. Then
+ the media can be grabbed by requesting from an url composed
+ of the stream token and uid
+ """
+
+ _VALID_URL = r'''(?x)^(?:https?://)?
+ (?:(?:(?:www\.|m\.)?soundcloud\.com/
+ (?!stations/track)
+ (?P<uploader>[\w\d-]+)/
+ (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?P<title>[\w\d-]+)/?
+ (?P<token>[^?]+?)?(?:[?].*)?$)
+ |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
+ (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
+ )
+ '''
+ IE_NAME = 'soundcloud'
+ _TESTS = [
+ {
+ 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+ 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'info_dict': {
+ 'id': '62986583',
+ 'ext': 'mp3',
+ 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
+ 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
+ 'uploader': 'E.T. ExTerrestrial Music',
+ 'uploader_id': '1571244',
+ 'timestamp': 1349920598,
+ 'upload_date': '20121011',
+ 'duration': 143.216,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ }
+ },
+ # geo-restricted
+ {
+ 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '47127627',
+ 'ext': 'mp3',
+ 'title': 'Goldrushed',
+ 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
+ 'uploader': 'The Royal Concept',
+ 'uploader_id': '9615865',
+ 'timestamp': 1337635207,
+ 'upload_date': '20120521',
+ 'duration': 227.155,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link
+ {
+ 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
+ 'upload_date': '20131209',
+ 'duration': 9.927,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link (alt format)
+ {
+ 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
+ 'upload_date': '20131209',
+ 'duration': 9.927,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # downloadable song
+ {
+ 'url': 'https://soundcloud.com/oddsamples/bus-brakes',
+ 'md5': '7624f2351f8a3b2e7cd51522496e7631',
+ 'info_dict': {
+ 'id': '128590877',
+ 'ext': 'mp3',
+ 'title': 'Bus Brakes',
+ 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
+ 'uploader': 'oddsamples',
+ 'uploader_id': '73680509',
+ 'timestamp': 1389232924,
+ 'upload_date': '20140109',
+ 'duration': 17.346,
+ 'license': 'cc-by-sa',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link, downloadable format
+ {
+ 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
+ 'md5': '64a60b16e617d41d0bef032b7f55441e',
+ 'info_dict': {
+ 'id': '340344461',
+ 'ext': 'wav',
+ 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
+ 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
+ 'uploader': 'Ori Uplift Music',
+ 'uploader_id': '12563093',
+ 'timestamp': 1504206263,
+ 'upload_date': '20170831',
+ 'duration': 7449.096,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # no album art, use avatar pic for thumbnail
+ {
+ 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
+ 'md5': '59c7872bc44e5d99b7211891664760c2',
+ 'info_dict': {
+ 'id': '309699954',
+ 'ext': 'mp3',
+ 'title': 'Sideways (Prod. Mad Real)',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'uploader': 'garyvee',
+ 'uploader_id': '2366352',
+ 'timestamp': 1488152409,
+ 'upload_date': '20170226',
+ 'duration': 207.012,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
+ 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'info_dict': {
+ 'id': '583011102',
+ 'ext': 'mp3',
+ 'title': 'Mezzo Valzer',
+ 'description': 'md5:4138d582f81866a530317bae316e8b61',
+ 'uploader': 'Micronie',
+ 'uploader_id': '3352531',
+ 'timestamp': 1551394171,
+ 'upload_date': '20190228',
+ 'duration': 180.157,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ # with AAC HQ format available via OAuth token
+ 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
+ 'only_matching': True,
+ },
+ ]
+
+ _API_V2_BASE = 'https://api-v2.soundcloud.com/'
+ _BASE_URL = 'https://soundcloud.com/'
+ _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
+
+ _ARTWORK_MAP = {
+ 'mini': 16,
+ 'tiny': 20,
+ 'small': 32,
+ 'badge': 47,
+ 't67x67': 67,
+ 'large': 100,
+ 't300x300': 300,
+ 'crop': 400,
+ 't500x500': 500,
+ 'original': 0,
+ }
+
+ def _store_client_id(self, client_id):
+ self._downloader.cache.store('soundcloud', 'client_id', client_id)
+
+ def _update_client_id(self):
+ webpage = self._download_webpage('https://soundcloud.com/', None)
+ for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
+ script = self._download_webpage(src, None, fatal=False)
+ if script:
+ client_id = self._search_regex(
+ r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
+ script, 'client id', default=None)
+ if client_id:
+ self._CLIENT_ID = client_id
+ self._store_client_id(client_id)
+ return
+ raise ExtractorError('Unable to extract client id')
+
+ def _download_json(self, *args, **kwargs):
+ non_fatal = kwargs.get('fatal') is False
+ if non_fatal:
+ del kwargs['fatal']
+ query = kwargs.get('query', {}).copy()
+ for _ in range(2):
+ query['client_id'] = self._CLIENT_ID
+ kwargs['query'] = query
+ try:
+ return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self._store_client_id(None)
+ self._update_client_id()
+ continue
+ elif non_fatal:
+ self._downloader.report_warning(error_to_compat_str(e))
+ return False
+ raise
+
+ def _real_initialize(self):
+ self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
+
+ @classmethod
+ def _resolv_url(cls, url):
+ return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
+
+ def _extract_info_dict(self, info, full_title=None, secret_token=None):
+ track_id = compat_str(info['id'])
+ title = info['title']
+
+ format_urls = set()
+ formats = []
+ query = {'client_id': self._CLIENT_ID}
+ if secret_token:
+ query['secret_token'] = secret_token
+
+ if info.get('downloadable') and info.get('has_downloads_left'):
+ download_url = update_url_query(
+ self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
+ redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
+ if redirect_url:
+ urlh = self._request_webpage(
+ HEADRequest(redirect_url), track_id, fatal=False)
+ if urlh:
+ format_url = urlh.geturl()
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': 'download',
+ 'ext': urlhandle_detect_ext(urlh) or 'mp3',
+ 'filesize': int_or_none(urlh.headers.get('Content-Length')),
+ 'url': format_url,
+ 'preference': 10,
+ })
+
+ def invalid_url(url):
+ return not url or url in format_urls
+
+ def add_format(f, protocol, is_preview=False):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
+ if mobj:
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ ext = f.get('ext')
+ if ext == 'aac':
+ f['abr'] = '256'
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
+ if preview:
+ format_id_list.append('preview')
+ abr = f.get('abr')
+ if abr:
+ f['abr'] = int(abr)
+ if protocol == 'hls':
+ protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
+ else:
+ protocol = 'http'
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': protocol,
+ 'preference': -10 if preview else None,
+ })
+ formats.append(f)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
+ continue
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = self._download_json(
+ format_url, track_id, query=query, fatal=False)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
+ 'url': stream_url,
+ 'ext': ext,
+ }, 'http' if protocol == 'progressive' else protocol,
+ t.get('snipped') or '/preview/' in format_url)
+
+ for f in formats:
+ f['vcodec'] = 'none'
+
+ if not formats and info.get('policy') == 'BLOCK':
+ self.raise_geo_restricted()
+ self._sort_formats(formats)
+
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ track_id = mobj.group('track_id')
+
+ query = {}
+ if track_id:
+ info_json_url = self._API_V2_BASE + 'tracks/' + track_id
+ full_title = track_id
+ token = mobj.group('secret_token')
+ if token:
+ query['secret_token'] = token
+ else:
+ full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
+ token = mobj.group('token')
+ if token:
+ resolve_title += '/%s' % token
+ info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
+
+ info = self._download_json(
+ info_json_url, full_title, 'Downloading info JSON', query=query)
+
+ return self._extract_info_dict(info, full_title, token)
+
+
+class SoundcloudPlaylistBaseIE(SoundcloudIE):
+ def _extract_set(self, playlist, token=None):
+ playlist_id = compat_str(playlist['id'])
+ tracks = playlist.get('tracks') or []
+ if not all([t.get('permalink_url') for t in tracks]) and token:
+ tracks = self._download_json(
+ self._API_V2_BASE + 'tracks', playlist_id,
+ 'Downloading tracks', query={
+ 'ids': ','.join([compat_str(t['id']) for t in tracks]),
+ 'playlistId': playlist_id,
+ 'playlistSecretToken': token,
+ })
+ entries = []
+ for track in tracks:
+ track_id = str_or_none(track.get('id'))
+ url = track.get('permalink_url')
+ if not url:
+ if not track_id:
+ continue
+ url = self._API_V2_BASE + 'tracks/' + track_id
+ if token:
+ url += '?secret_token=' + token
+ entries.append(self.url_result(
+ url, SoundcloudIE.ie_key(), track_id))
+ return self.playlist_result(
+ entries, playlist_id,
+ playlist.get('title'),
+ playlist.get('description'))
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ IE_NAME = 'soundcloud:set'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '2284613',
+ 'title': 'The Royal Concept EP',
+ 'description': 'md5:71d07087c7a449e8941a70a29e34671e',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
+ token = mobj.group('token')
+ if token:
+ full_title += '/' + token
+
+ info = self._download_json(self._resolv_url(
+ self._BASE_URL + full_title), full_title)
+
+ if 'errors' in info:
+ msgs = (compat_str(err['error_message']) for err in info['errors'])
+ raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
+
+ return self._extract_set(info, token)
+
+
+class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
+ def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
+ # https://developers.soundcloud.com/blog/offset-pagination-deprecated
+ COMMON_QUERY = {
+ 'limit': 200,
+ 'linked_partitioning': '1',
+ }
+
+ query = COMMON_QUERY.copy()
+ query['offset'] = 0
+
+ next_href = base_url
+
+ entries = []
+ for i in itertools.count():
+ response = self._download_json(
+ next_href, playlist_id,
+ 'Downloading track page %s' % (i + 1), query=query)
+
+ collection = response['collection']
+
+ if not isinstance(collection, list):
+ collection = []
+
+ # Empty collection may be returned, in this case we proceed
+ # straight to next_href
+
+ def resolve_entry(candidates):
+ for cand in candidates:
+ if not isinstance(cand, dict):
+ continue
+ permalink_url = url_or_none(cand.get('permalink_url'))
+ if not permalink_url:
+ continue
+ return self.url_result(
+ permalink_url,
+ SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ str_or_none(cand.get('id')), cand.get('title'))
+
+ for e in collection:
+ entry = resolve_entry((e, e.get('track'), e.get('playlist')))
+ if entry:
+ entries.append(entry)
+
+ next_href = response.get('next_href')
+ if not next_href:
+ break
+
+ next_href = response['next_href']
+ parsed_next_href = compat_urlparse.urlparse(next_href)
+ query = compat_urlparse.parse_qs(parsed_next_href.query)
+ query.update(COMMON_QUERY)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m)\.)?soundcloud\.com/
+ (?P<user>[^/]+)
+ (?:/
+ (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
+ )?
+ /?(?:[?#].*)?$
+ '''
+ IE_NAME = 'soundcloud:user'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/soft-cell-official',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (All)',
+ },
+ 'playlist_mincount': 28,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/tracks',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Tracks)',
+ },
+ 'playlist_mincount': 27,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/albums',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Albums)',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/sets',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Sets)',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/reposts',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Reposts)',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/clalberg/likes',
+ 'info_dict': {
+ 'id': '11817582',
+ 'title': 'clalberg (Likes)',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/grynpyret/spotlight',
+ 'info_dict': {
+ 'id': '7098329',
+ 'title': 'Grynpyret (Spotlight)',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ _BASE_URL_MAP = {
+ 'all': 'stream/users/%s',
+ 'tracks': 'users/%s/tracks',
+ 'albums': 'users/%s/albums',
+ 'sets': 'users/%s/playlists',
+ 'reposts': 'stream/users/%s/reposts',
+ 'likes': 'users/%s/likes',
+ 'spotlight': 'users/%s/spotlight',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader = mobj.group('user')
+
+ user = self._download_json(
+ self._resolv_url(self._BASE_URL + uploader),
+ uploader, 'Downloading user info')
+
+ resource = mobj.group('rsrc') or 'all'
+
+ return self._extract_playlist(
+ self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
+ str_or_none(user.get('id')),
+ '%s (%s)' % (user['username'], resource.capitalize()))
+
+
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+ IE_NAME = 'soundcloud:trackstation'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+ 'info_dict': {
+ 'id': '286017854',
+ 'title': 'Track station: your text',
+ },
+ 'playlist_mincount': 47,
+ }]
+
+ def _real_extract(self, url):
+ track_name = self._match_id(url)
+
+ track = self._download_json(self._resolv_url(url), track_name)
+ track_id = self._search_regex(
+ r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
+
+ return self._extract_playlist(
+ self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
+ track_id, 'Track station: %s' % track['title'])
+
+
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+ IE_NAME = 'soundcloud:playlist'
+ _TESTS = [{
+ 'url': 'https://api.soundcloud.com/playlists/4110309',
+ 'info_dict': {
+ 'id': '4110309',
+ 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+ 'description': 're:.*?TILT Brass - Bowery Poetry Club',
+ },
+ 'playlist_count': 6,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ query = {}
+ token = mobj.group('token')
+ if token:
+ query['secret_token'] = token
+
+ data = self._download_json(
+ self._API_V2_BASE + 'playlists/' + playlist_id,
+ playlist_id, 'Downloading playlist', query=query)
+
+ return self._extract_set(data, token)
+
+
+class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
+ IE_NAME = 'soundcloud:search'
+ IE_DESC = 'Soundcloud search'
+ _MAX_RESULTS = float('inf')
+ _TESTS = [{
+ 'url': 'scsearch15:post-avant jazzcore',
+ 'info_dict': {
+ 'title': 'post-avant jazzcore',
+ },
+ 'playlist_count': 15,
+ }]
+
+ _SEARCH_KEY = 'scsearch'
+ _MAX_RESULTS_PER_PAGE = 200
+ _DEFAULT_RESULTS_PER_PAGE = 50
+
+ def _get_collection(self, endpoint, collection_id, **query):
+ limit = min(
+ query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+ self._MAX_RESULTS_PER_PAGE)
+ query.update({
+ 'limit': limit,
+ 'linked_partitioning': 1,
+ 'offset': 0,
+ })
+ next_url = update_url_query(self._API_V2_BASE + endpoint, query)
+
+ collected_results = 0
+
+ for i in itertools.count(1):
+ response = self._download_json(
+ next_url, collection_id, 'Downloading page {0}'.format(i),
+ 'Unable to download API page')
+
+ collection = response.get('collection', [])
+ if not collection:
+ break
+
+ collection = list(filter(bool, collection))
+ collected_results += len(collection)
+
+ for item in collection:
+ yield self.url_result(item['uri'], SoundcloudIE.ie_key())
+
+ if not collection or collected_results >= limit:
+ break
+
+ next_url = response.get('next_href')
+ if not next_url:
+ break
+
+ def _get_n_results(self, query, n):
+ tracks = self._get_collection('search/tracks', query, limit=n, q=query)
+ return self.playlist_result(tracks, playlist_title=query)
diff --git a/hypervideo_dl/extractor/soundgasm.py b/hypervideo_dl/extractor/soundgasm.py
new file mode 100644
index 0000000..3d78a9d
--- /dev/null
+++ b/hypervideo_dl/extractor/soundgasm.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SoundgasmIE(InfoExtractor):
+ IE_NAME = 'soundgasm'
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
+ _TEST = {
+ 'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
+ 'md5': '010082a2c802c5275bb00030743e75ad',
+ 'info_dict': {
+ 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
+ 'ext': 'm4a',
+ 'title': 'Piano sample',
+ 'description': 'Royalty Free Sample Music',
+ 'uploader': 'ytdl',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ audio_url = self._html_search_regex(
+ r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'audio URL', group='url')
+
+ title = self._search_regex(
+ r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
+ webpage, 'title', default=display_id)
+
+ description = self._html_search_regex(
+ (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
+ r'(?s)<li>Description:\s(.*?)<\/li>'),
+ webpage, 'description', fatal=False)
+
+ audio_id = self._search_regex(
+ r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
+
+ return {
+ 'id': audio_id,
+ 'display_id': display_id,
+ 'url': audio_url,
+ 'vcodec': 'none',
+ 'title': title,
+ 'description': description,
+ 'uploader': mobj.group('user'),
+ }
+
+
+class SoundgasmProfileIE(InfoExtractor):
+ IE_NAME = 'soundgasm:profile'
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
+ _TEST = {
+ 'url': 'http://soundgasm.net/u/ytdl',
+ 'info_dict': {
+ 'id': 'ytdl',
+ },
+ 'playlist_count': 1,
+ }
+
+ def _real_extract(self, url):
+ profile_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, profile_id)
+
+ entries = [
+ self.url_result(audio_url, 'Soundgasm')
+ for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
+
+ return self.playlist_result(entries, profile_id)
diff --git a/hypervideo_dl/extractor/southpark.py b/hypervideo_dl/extractor/southpark.py
new file mode 100644
index 0000000..0774da0
--- /dev/null
+++ b/hypervideo_dl/extractor/southpark.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class SouthParkIE(MTVServicesInfoExtractor):
+ IE_NAME = 'southpark.cc.com'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
+ 'info_dict': {
+ 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'South Park|Bat Daded',
+ 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+ 'timestamp': 1112760000,
+ 'upload_date': '20050406',
+ },
+ }, {
+ 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
+ 'only_matching': True,
+ }]
+
+ def _get_feed_query(self, uri):
+ return {
+ 'accountOverride': 'intl.mtvi.com',
+ 'arcEp': 'shared.southpark.global',
+ 'ep': '90877963',
+ 'imageEp': 'shared.southpark.global',
+ 'mgid': uri,
+ }
+
+
+class SouthParkEsIE(SouthParkIE):
+ IE_NAME = 'southpark.cc.com:español'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))'
+ _LANG = 'es'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+ 'info_dict': {
+ 'title': 'Cartman Consigue Una Sonda Anal',
+ 'description': 'Cartman Consigue Una Sonda Anal',
+ },
+ 'playlist_count': 4,
+ 'skip': 'Geo-restricted',
+ }]
+
+
+class SouthParkDeIE(SouthParkIE):
+ IE_NAME = 'southpark.de'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
+ 'info_dict': {
+ 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
+ 'ext': 'mp4',
+ 'title': 'South Park|The Government Won\'t Respect My Privacy',
+ 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
+ 'timestamp': 1380160800,
+ 'upload_date': '20130926',
+ },
+ }, {
+ # non-ASCII characters in initial URL
+ 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
+ 'info_dict': {
+ 'title': 'Hashtag „Aufwärmen“',
+ 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
+ },
+ 'playlist_count': 3,
+ }, {
+ # non-ASCII characters in redirect URL
+ 'url': 'http://www.southpark.de/alle-episoden/s18e09',
+ 'info_dict': {
+ 'title': 'Hashtag „Aufwärmen“',
+ 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1',
+ 'only_matching': True,
+ }]
+
+
+class SouthParkNlIE(SouthParkIE):
+ IE_NAME = 'southpark.nl'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
+ 'info_dict': {
+ 'title': 'Freemium Isn\'t Free',
+ 'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+
+class SouthParkDkIE(SouthParkIE):
+ IE_NAME = 'southparkstudios.dk'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
+ 'info_dict': {
+ 'title': 'Grounded Vindaloop',
+ 'description': 'Butters is convinced he\'s living in a virtual reality.',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/spankbang.py b/hypervideo_dl/extractor/spankbang.py
new file mode 100644
index 0000000..37cb8c8
--- /dev/null
+++ b/hypervideo_dl/extractor/spankbang.py
@@ -0,0 +1,198 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ merge_dicts,
+ parse_duration,
+ parse_resolution,
+ str_to_int,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class SpankBangIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?spankbang\.com/
+ (?:
+ (?P<id>[\da-z]+)/(?:video|play|embed)\b|
+ [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+ 'md5': '1cc433e1d6aa14bc376535b8679302f7',
+ 'info_dict': {
+ 'id': '3vvn',
+ 'ext': 'mp4',
+ 'title': 'fantasy solo',
+ 'description': 'dillion harper masturbates on a bed',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'silly2587',
+ 'timestamp': 1422571989,
+ 'upload_date': '20150129',
+ 'age_limit': 18,
+ }
+ }, {
+ # 480p only
+ 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
+ 'only_matching': True,
+ }, {
+ # no uploader
+ 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
+ 'only_matching': True,
+ }, {
+ # mobile page
+ 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
+ 'only_matching': True,
+ }, {
+ # 4k
+ 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2y3td/embed/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ webpage = self._download_webpage(
+ url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
+ video_id, headers={'Cookie': 'country=US'})
+
+ if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
+ formats = []
+
+ def extract_format(format_id, format_url):
+ f_url = url_or_none(format_url)
+ if not f_url:
+ return
+ f = parse_resolution(format_id)
+ ext = determine_ext(f_url)
+ if format_id.startswith('m3u8') or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id.startswith('mpd') or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ f_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp4' or f.get('width') or f.get('height'):
+ f.update({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+ formats.append(f)
+
+ STREAM_URL_PREFIX = 'stream_url_'
+
+ for mobj in re.finditer(
+ r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
+ % STREAM_URL_PREFIX, webpage):
+ extract_format(mobj.group('id', 'url'))
+
+ if not formats:
+ stream_key = self._search_regex(
+ r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'stream key', group='value')
+
+ stream = self._download_json(
+ 'https://spankbang.com/api/videos/stream', video_id,
+ 'Downloading stream JSON', data=urlencode_postdata({
+ 'id': stream_key,
+ 'data': 0,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ for format_id, format_url in stream.items():
+ if format_url and isinstance(format_url, list):
+ format_url = format_url[0]
+ extract_format(format_id, format_url)
+
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._html_search_regex(
+ r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
+ description = self._search_regex(
+ r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ uploader = self._html_search_regex(
+ (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
+ r'class="user"[^>]*><img[^>]+>([^<]+)'),
+ webpage, 'uploader', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
+ webpage, 'duration', default=None))
+ view_count = str_to_int(self._search_regex(
+ r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
+
+ age_limit = self._rta_search(webpage)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ }, info
+ )
+
+
+class SpankBangPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
+ _TEST = {
+ 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
+ 'info_dict': {
+ 'id': 'ug0k',
+ 'title': 'Big Ass Titties',
+ },
+ 'playlist_mincount': 40,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(
+ url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
+
+ entries = [self.url_result(
+ urljoin(url, mobj.group('path')),
+ ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
+ for mobj in re.finditer(
+ r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
+ % re.escape(display_id), webpage)]
+
+ title = self._html_search_regex(
+ r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
+ fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/hypervideo_dl/extractor/spankwire.py b/hypervideo_dl/extractor/spankwire.py
new file mode 100644
index 0000000..35ab9ec
--- /dev/null
+++ b/hypervideo_dl/extractor/spankwire.py
@@ -0,0 +1,182 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ str_or_none,
+ str_to_int,
+ url_or_none,
+)
+
+
+class SpankwireIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?spankwire\.com/
+ (?:
+ [^/]+/video|
+ EmbedPlayer\.aspx/?\?.*?\bArticleId=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
+ 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+ 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd',
+ 'info_dict': {
+ 'id': '103545',
+ 'ext': 'mp4',
+ 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
+ 'description': 'Crazy Bitch X rated music video.',
+ 'duration': 222,
+ 'uploader': 'oreusz',
+ 'uploader_id': '124697',
+ 'timestamp': 1178587885,
+ 'upload_date': '20070508',
+ 'average_rating': float,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ # download URL pattern: */mp4_<format_id>_<video_id>.mp4
+ 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
+ 'md5': '09b3c20833308b736ae8902db2f8d7e6',
+ 'info_dict': {
+ 'id': '1921551',
+ 'ext': 'mp4',
+ 'title': 'Titcums Compiloation I',
+ 'description': 'cum on tits',
+ 'uploader': 'dannyh78999',
+ 'uploader_id': '3056053',
+ 'upload_date': '20150822',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118'
+ },
+ 'skip': 'removed',
+ }, {
+ 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id)
+
+ title = video['title']
+
+ formats = []
+ videos = video.get('videos')
+ if isinstance(videos, dict):
+ for format_id, format_url in videos.items():
+ video_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id, 'height', default=None))
+ m = re.search(
+ r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url)
+ if m:
+ tbr = int(m.group('tbr'))
+ height = height or int(m.group('height'))
+ else:
+ tbr = None
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%dp' % height if height else format_id,
+ 'height': height,
+ 'tbr': tbr,
+ })
+ m3u8_url = url_or_none(video.get('HLS'))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id'))
+
+ view_count = str_to_int(video.get('viewed'))
+
+ thumbnails = []
+ for preference, t in enumerate(('', '2x'), start=0):
+ thumbnail_url = url_or_none(video.get('poster%s' % t))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
+ })
+
+ def extract_names(key):
+ entries_list = video.get(key)
+ if not isinstance(entries_list, list):
+ return
+ entries = []
+ for entry in entries_list:
+ name = str_or_none(entry.get('name'))
+ if name:
+ entries.append(name)
+ return entries
+
+ categories = extract_names('categories')
+ tags = extract_names('tags')
+
+ uploader = None
+ info = {}
+
+ webpage = self._download_webpage(
+ 'https://www.spankwire.com/_/video%s/' % video_id, video_id,
+ fatal=False)
+ if webpage:
+ info = self._search_json_ld(webpage, video_id, default={})
+ thumbnail_url = None
+ if 'thumbnail' in info:
+ thumbnail_url = url_or_none(info['thumbnail'])
+ del info['thumbnail']
+ if not thumbnail_url:
+ thumbnail_url = self._og_search_thumbnail(webpage)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': 10,
+ })
+ uploader = self._html_search_regex(
+ r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+ if not view_count:
+ view_count = str_to_int(self._search_regex(
+ r'data-views=["\']([\d,.]+)', webpage, 'view count',
+ fatal=False))
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'thumbnails': thumbnails,
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(video.get('userId')),
+ 'timestamp': int_or_none(video.get('time_approved_on')),
+ 'average_rating': float_or_none(video.get('rating')),
+ 'view_count': view_count,
+ 'comment_count': int_or_none(video.get('comments')),
+ 'age_limit': 18,
+ 'categories': categories,
+ 'tags': tags,
+ 'formats': formats,
+ }, info)
diff --git a/hypervideo_dl/extractor/spiegel.py b/hypervideo_dl/extractor/spiegel.py
new file mode 100644
index 0000000..2da32b9
--- /dev/null
+++ b/hypervideo_dl/extractor/spiegel.py
@@ -0,0 +1,54 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class SpiegelIE(InfoExtractor):
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
+ _TESTS = [{
+ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
+ 'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
+ 'info_dict': {
+ 'id': 'II0BUyxY',
+ 'display_id': '1259285',
+ 'ext': 'mp4',
+ 'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
+ 'description': 'md5:8029d8310232196eb235d27575a8b9f4',
+ 'duration': 48.0,
+ 'upload_date': '20130311',
+ 'timestamp': 1362997920,
+ },
+ }, {
+ 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media_id = self._html_search_regex(
+ r'(&#34;|["\'])mediaId\1\s*:\s*(&#34;|["\'])(?P<id>(?:(?!\2).)+)\2',
+ webpage, 'media id', group='id')
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': video_id,
+ 'url': 'jwplatform:%s' % media_id,
+ 'title': self._og_search_title(webpage, default=None),
+ 'ie_key': JWPlatformIE.ie_key(),
+ }
diff --git a/hypervideo_dl/extractor/spike.py b/hypervideo_dl/extractor/spike.py
new file mode 100644
index 0000000..5805f3d
--- /dev/null
+++ b/hypervideo_dl/extractor/spike.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class BellatorIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
+ 'info_dict': {
+ 'title': 'Michael Page vs. Evangelista Cyborg',
+ 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
+ 'only_matching': True,
+ }]
+
+ _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
+ _GEO_COUNTRIES = ['US']
+
+
+class ParamountNetworkIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13',
+ 'info_dict': {
+ 'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
+ 'ext': 'mp4',
+ 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
+ 'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+ _GEO_COUNTRIES = ['US']
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'paramountnetwork.com',
+ 'imageEp': 'paramountnetwork.com',
+ 'mgid': uri,
+ }
diff --git a/hypervideo_dl/extractor/sport5.py b/hypervideo_dl/extractor/sport5.py
new file mode 100644
index 0000000..a417b5a
--- /dev/null
+++ b/hypervideo_dl/extractor/sport5.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class Sport5IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
+ 'info_dict': {
+ 'id': 's5-Y59xx1-GUh2',
+ 'ext': 'mp4',
+ 'title': 'ולנסיה-קורדובה 0:3',
+ 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
+ 'duration': 228,
+ 'categories': list,
+ },
+ 'skip': 'Blocked outside of Israel',
+ }, {
+ 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
+ 'info_dict': {
+ 'id': 's5-SiXxx1-hKh2',
+ 'ext': 'mp4',
+ 'title': 'GOALS_CELTIC_270914.mp4',
+ 'description': '',
+ 'duration': 87,
+ 'categories': list,
+ },
+ 'skip': 'Blocked outside of Israel',
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ media_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, media_id)
+
+ video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id')
+
+ metadata = self._download_xml(
+ 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
+ video_id)
+
+ error = metadata.find('./Error')
+ if error is not None:
+ raise ExtractorError(
+ '%s returned error: %s - %s' % (
+ self.IE_NAME,
+ error.find('./Name').text,
+ error.find('./Description').text),
+ expected=True)
+
+ title = metadata.find('./Title').text
+ description = metadata.find('./Description').text
+ duration = int(metadata.find('./Duration').text)
+
+ posters_el = metadata.find('./PosterLinks')
+ thumbnails = [{
+ 'url': thumbnail.text,
+ 'width': int(thumbnail.get('width')),
+ 'height': int(thumbnail.get('height')),
+ } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
+
+ categories_el = metadata.find('./Categories')
+ categories = [
+ cat.get('name') for cat in categories_el.findall('./Category')
+ ] if categories_el is not None else []
+
+ formats = [{
+ 'url': fmt.text,
+ 'ext': 'mp4',
+ 'vbr': int(fmt.get('bitrate')),
+ 'width': int(fmt.get('width')),
+ 'height': int(fmt.get('height')),
+ } for fmt in metadata.findall('./PlaybackLinks/FileURL')]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'categories': categories,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/sportbox.py b/hypervideo_dl/extractor/sportbox.py
new file mode 100644
index 0000000..b9017fd
--- /dev/null
+++ b/hypervideo_dl/extractor/sportbox.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+)
+
+
+class SportBoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
+ 'info_dict': {
+ 'id': '109158',
+ 'ext': 'mp4',
+ 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+ 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 292,
+ 'view_count': int,
+ 'timestamp': 1426237001,
+ 'upload_date': '20150313',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.sportbox.ru/vdl/player/media/193095',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.sportbox.ru/vdl/player/media/109158',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://matchtv.ru/vdl/player/media/109158',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
+ webpage, 'sources'),
+ video_id, transform_source=js_to_json)
+
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src:
+ continue
+ if determine_ext(src) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'player options', default='{}'),
+ video_id, transform_source=js_to_json)
+ media_id = player['mediaId']
+
+ info = self._search_json_ld(webpage, media_id, default={})
+
+ view_count = int_or_none(self._search_regex(
+ r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
+
+ return merge_dicts(info, {
+ 'id': media_id,
+ 'title': self._og_search_title(webpage, default=None) or media_id,
+ 'thumbnail': player.get('poster'),
+ 'duration': int_or_none(player.get('duration')),
+ 'view_count': view_count,
+ 'formats': formats,
+ })
diff --git a/hypervideo_dl/extractor/sportdeutschland.py b/hypervideo_dl/extractor/sportdeutschland.py
new file mode 100644
index 0000000..3e497a9
--- /dev/null
+++ b/hypervideo_dl/extractor/sportdeutschland.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class SportDeutschlandIE(InfoExtractor):
+ _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
+ _TESTS = [{
+ 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+ 'info_dict': {
+ 'id': '5318cac0275701382770543d7edaf0a0',
+ 'ext': 'mp4',
+ 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
+ 'duration': 16106.36,
+ },
+ 'params': {
+ 'noplaylist': True,
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+ 'info_dict': {
+ 'id': 'c6e2fdd01f63013854c47054d2ab776f',
+ 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
+ 'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
+ 'duration': 31397,
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ data = self._download_json(
+ 'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
+ display_id, query={'access_token': 'true'})
+ asset = data['asset']
+ title = (asset.get('title') or asset['label']).strip()
+ asset_id = asset.get('id') or asset.get('uuid')
+ info = {
+ 'id': asset_id,
+ 'title': title,
+ 'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
+ 'duration': int_or_none(asset.get('seconds')),
+ }
+ videos = asset.get('videos') or []
+ if len(videos) > 1:
+ playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
+ if playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ videos = [videos[int(playlist_id)]]
+ self.to_screen('Downloading just a single video because of --no-playlist')
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
+
+ def entries():
+ for i, video in enumerate(videos, 1):
+ video_id = video.get('uuid')
+ video_url = video.get('url')
+ if not (video_id and video_url):
+ continue
+ formats = self._extract_m3u8_formats(
+ video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
+ if not formats:
+ continue
+ yield {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
+ 'duration': float_or_none(video.get('duration')),
+ }
+ info.update({
+ '_type': 'multi_video',
+ 'entries': entries(),
+ })
+ else:
+ formats = self._extract_m3u8_formats(
+ videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
+ section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
+ info.update({
+ 'formats': formats,
+ 'display_id': asset.get('permalink'),
+ 'thumbnail': try_get(asset, lambda x: x['images'][0]),
+ 'categories': [section_title] if section_title else None,
+ 'view_count': int_or_none(asset.get('views')),
+ 'is_live': asset.get('is_live') is True,
+ 'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/spotify.py b/hypervideo_dl/extractor/spotify.py
new file mode 100644
index 0000000..826f98c
--- /dev/null
+++ b/hypervideo_dl/extractor/spotify.py
@@ -0,0 +1,156 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class SpotifyBaseIE(InfoExtractor):
+ _ACCESS_TOKEN = None
+ _OPERATION_HASHES = {
+ 'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
+ 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
+ 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
+ }
+ _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
+
+ def _real_initialize(self):
+ self._ACCESS_TOKEN = self._download_json(
+ 'https://open.spotify.com/get_access_token', None)['accessToken']
+
+ def _call_api(self, operation, video_id, variables):
+ return self._download_json(
+ 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
+ 'operationName': 'query' + operation,
+ 'variables': json.dumps(variables),
+ 'extensions': json.dumps({
+ 'persistedQuery': {
+ 'sha256Hash': self._OPERATION_HASHES[operation],
+ },
+ })
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+
+ def _extract_episode(self, episode, series):
+ episode_id = episode['id']
+ title = episode['name'].strip()
+
+ formats = []
+ audio_preview = episode.get('audioPreview') or {}
+ audio_preview_url = audio_preview.get('url')
+ if audio_preview_url:
+ f = {
+ 'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
+ 'vcodec': 'none',
+ }
+ audio_preview_format = audio_preview.get('format')
+ if audio_preview_format:
+ f['format_id'] = audio_preview_format
+ mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
+ if mobj:
+ f.update({
+ 'abr': int(mobj.group(2)),
+ 'ext': mobj.group(1).lower(),
+ })
+ formats.append(f)
+
+ for item in (try_get(episode, lambda x: x['audio']['items']) or []):
+ item_url = item.get('url')
+ if not (item_url and item.get('externallyHosted')):
+ continue
+ formats.append({
+ 'url': clean_podcast_url(item_url),
+ 'vcodec': 'none',
+ })
+
+ thumbnails = []
+ for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ thumbnails.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+
+ return {
+ 'id': episode_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': strip_or_none(episode.get('description')),
+ 'duration': float_or_none(try_get(
+ episode, lambda x: x['duration']['totalMilliseconds']), 1000),
+ 'release_date': unified_strdate(try_get(
+ episode, lambda x: x['releaseDate']['isoString'])),
+ 'series': series,
+ }
+
+
+class SpotifyIE(SpotifyBaseIE):
+ IE_NAME = 'spotify'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
+ _TEST = {
+ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
+ 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
+ 'info_dict': {
+ 'id': '4Z7GAJ50bgctf6uclHlWKo',
+ 'ext': 'mp3',
+ 'title': 'From the archive: Why time management is ruining our lives',
+ 'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
+ 'duration': 2083.605,
+ 'release_date': '20201217',
+ 'series': "The Guardian's Audio Long Reads",
+ }
+ }
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ episode = self._call_api('Episode', episode_id, {
+ 'uri': 'spotify:episode:' + episode_id
+ })['episode']
+ return self._extract_episode(
+ episode, try_get(episode, lambda x: x['podcast']['name']))
+
+
+class SpotifyShowIE(SpotifyBaseIE):
+ IE_NAME = 'spotify:show'
+ _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
+ _TEST = {
+ 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
+ 'info_dict': {
+ 'id': '4PM9Ke6l66IRNpottHKV9M',
+ 'title': 'The Story from the Guardian',
+ 'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
+ },
+ 'playlist_mincount': 36,
+ }
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ podcast = self._call_api('ShowEpisodes', show_id, {
+ 'limit': 1000000000,
+ 'offset': 0,
+ 'uri': 'spotify:show:' + show_id,
+ })['podcast']
+ podcast_name = podcast.get('name')
+
+ entries = []
+ for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
+ episode = item.get('episode')
+ if not episode:
+ continue
+ entries.append(self._extract_episode(episode, podcast_name))
+
+ return self.playlist_result(
+ entries, show_id, podcast_name, podcast.get('description'))
diff --git a/hypervideo_dl/extractor/spreaker.py b/hypervideo_dl/extractor/spreaker.py
new file mode 100644
index 0000000..6c7e40a
--- /dev/null
+++ b/hypervideo_dl/extractor/spreaker.py
@@ -0,0 +1,176 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+def _extract_episode(data, episode_id=None):
+ title = data['title']
+ download_url = data['download_url']
+
+ series = try_get(data, lambda x: x['show']['title'], compat_str)
+ uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
+
+ thumbnails = []
+ for image in ('image_original', 'image_medium', 'image'):
+ image_url = url_or_none(data.get('%s_url' % image))
+ if image_url:
+ thumbnails.append({'url': image_url})
+
+ def stats(key):
+ return int_or_none(try_get(
+ data,
+ (lambda x: x['%ss_count' % key],
+ lambda x: x['stats']['%ss' % key])))
+
+ def duration(key):
+ return float_or_none(data.get(key), scale=1000)
+
+ return {
+ 'id': compat_str(episode_id or data['episode_id']),
+ 'url': download_url,
+ 'display_id': data.get('permalink'),
+ 'title': title,
+ 'description': data.get('description'),
+ 'timestamp': unified_timestamp(data.get('published_at')),
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(data.get('author_id')),
+ 'creator': uploader,
+ 'duration': duration('duration') or duration('length'),
+ 'view_count': stats('play'),
+ 'like_count': stats('like'),
+ 'comment_count': stats('message'),
+ 'format': 'MPEG Layer 3',
+ 'format_id': 'mp3',
+ 'container': 'mp3',
+ 'ext': 'mp3',
+ 'thumbnails': thumbnails,
+ 'series': series,
+ 'extractor_key': SpreakerIE.ie_key(),
+ }
+
+
+class SpreakerIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ api\.spreaker\.com/
+ (?:
+ (?:download/)?episode|
+ v2/episodes
+ )/
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://api.spreaker.com/episode/12534508',
+ 'info_dict': {
+ 'id': '12534508',
+ 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
+ 'ext': 'mp3',
+ 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
+ 'description': 'md5:0588c43e27be46423e183076fa071177',
+ 'timestamp': 1502250336,
+ 'upload_date': '20170809',
+ 'uploader': 'SWM',
+ 'uploader_id': '9780658',
+ 'duration': 1063.42,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'series': 'Success With Music (SWM)',
+ },
+ }, {
+ 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ data = self._download_json(
+ 'https://api.spreaker.com/v2/episodes/%s' % episode_id,
+ episode_id)['response']['episode']
+ return _extract_episode(data, episode_id)
+
+
+class SpreakerPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ episode_id = self._search_regex(
+ (r'data-episode_id=["\'](?P<id>\d+)',
+ r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
+ return self.url_result(
+ 'https://api.spreaker.com/episode/%s' % episode_id,
+ ie=SpreakerIE.ie_key(), video_id=episode_id)
+
+
+class SpreakerShowIE(InfoExtractor):
+ _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://api.spreaker.com/show/4652058',
+ 'info_dict': {
+ 'id': '4652058',
+ },
+ 'playlist_mincount': 118,
+ }]
+
+ def _entries(self, show_id):
+ for page_num in itertools.count(1):
+ episodes = self._download_json(
+ 'https://api.spreaker.com/show/%s/episodes' % show_id,
+ show_id, note='Downloading JSON page %d' % page_num, query={
+ 'page': page_num,
+ 'max_per_page': 100,
+ })
+ pager = try_get(episodes, lambda x: x['response']['pager'], dict)
+ if not pager:
+ break
+ results = pager.get('results')
+ if not results or not isinstance(results, list):
+ break
+ for result in results:
+ if not isinstance(result, dict):
+ continue
+ yield _extract_episode(result)
+ if page_num == pager.get('last_page'):
+ break
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
+
+
+class SpreakerShowPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/show/success-with-music',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ show_id = self._search_regex(
+ r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
+ return self.url_result(
+ 'https://api.spreaker.com/show/%s' % show_id,
+ ie=SpreakerShowIE.ie_key(), video_id=show_id)
diff --git a/hypervideo_dl/extractor/springboardplatform.py b/hypervideo_dl/extractor/springboardplatform.py
new file mode 100644
index 0000000..07d99b5
--- /dev/null
+++ b/hypervideo_dl/extractor/springboardplatform.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ xpath_attr,
+ xpath_text,
+ xpath_element,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class SpringboardPlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ cms\.springboardplatform\.com/
+ (?:
+ (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
+ xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
+ 'md5': '5c3cb7b5c55740d482561099e920f192',
+ 'info_dict': {
+ 'id': '981017',
+ 'ext': 'mp4',
+ 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
+ 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1409132328,
+ 'upload_date': '20140827',
+ 'duration': 193,
+ },
+ }, {
+ 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ index = mobj.group('index') or mobj.group('index_2')
+
+ video = self._download_xml(
+ 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s'
+ % (index, video_id), video_id)
+
+ item = xpath_element(video, './/item', 'item', fatal=True)
+
+ content = xpath_element(
+ item, './{http://search.yahoo.com/mrss/}content', 'content',
+ fatal=True)
+ title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
+
+ video_url = content.attrib['url']
+
+ if 'error_video.mp4' in video_url:
+ raise ExtractorError(
+ 'Video %s no longer exists' % video_id, expected=True)
+
+ duration = int_or_none(content.get('duration'))
+ tbr = int_or_none(content.get('bitrate'))
+ filesize = int_or_none(content.get('fileSize'))
+ width = int_or_none(content.get('width'))
+ height = int_or_none(content.get('height'))
+
+ description = unescapeHTML(xpath_text(
+ item, './description', 'description'))
+ thumbnail = xpath_attr(
+ item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
+ 'thumbnail')
+
+ timestamp = unified_timestamp(xpath_text(
+ item, './{http://cms.springboardplatform.com/namespaces.html}created',
+ 'timestamp'))
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': 'http',
+ 'tbr': tbr,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ }]
+
+ m3u8_format = formats[0].copy()
+ m3u8_format.update({
+ 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'hls',
+ 'protocol': 'm3u8_native',
+ })
+ formats.append(m3u8_format)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/sprout.py b/hypervideo_dl/extractor/sprout.py
new file mode 100644
index 0000000..e243732
--- /dev/null
+++ b/hypervideo_dl/extractor/sprout.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ update_url_query,
+)
+
+
+class SproutIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
+ 'info_dict': {
+ 'id': 'bm0foJFaTKqb',
+ 'ext': 'mp4',
+ 'title': 'Robot Bike Race',
+ 'description': 'md5:436b1d97117cc437f54c383f4debc66d',
+ 'timestamp': 1606148940,
+ 'upload_date': '20201123',
+ 'uploader': 'NBCU-MPAT',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.universalkids.com/watch/robot-bike-race',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['US']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ mpx_metadata = self._download_json(
+ # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
+ 'https://www.universalkids.com/_api/videos/' + display_id,
+ display_id)['mpxMetadata']
+ media_pid = mpx_metadata['mediaPid']
+ theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ if mpx_metadata.get('entitlement') == 'auth':
+ query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
+ theplatform_url = smuggle_url(
+ update_url_query(theplatform_url, query), {
+ 'force_smil_url': True,
+ 'geo_countries': self._GEO_COUNTRIES,
+ })
+ return {
+ '_type': 'url_transparent',
+ 'id': media_pid,
+ 'url': theplatform_url,
+ 'series': mpx_metadata.get('seriesName'),
+ 'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
+ 'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
+ 'ie_key': 'ThePlatform',
+ }
diff --git a/hypervideo_dl/extractor/srgssr.py b/hypervideo_dl/extractor/srgssr.py
new file mode 100644
index 0000000..ac018e7
--- /dev/null
+++ b/hypervideo_dl/extractor/srgssr.py
@@ -0,0 +1,252 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+ try_get,
+)
+
+
+class SRGSSRIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
+ srgssr
+ ):
+ (?P<bu>
+ srf|rts|rsi|rtr|swi
+ ):(?:[^:]+:)?
+ (?P<type>
+ video|audio
+ ):
+ (?P<id>
+ [0-9a-f\-]{36}|\d+
+ )
+ '''
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['CH']
+
+ _ERRORS = {
+ 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
+ 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.',
+ # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.',
+ 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.',
+ 'LEGAL': 'The video cannot be transmitted for legal reasons.',
+ 'STARTDATE': 'This video is not yet available. Please try again later.',
+ }
+ _DEFAULT_LANGUAGE_CODES = {
+ 'srf': 'de',
+ 'rts': 'fr',
+ 'rsi': 'it',
+ 'rtr': 'rm',
+ 'swi': 'en',
+ }
+
+ def _get_tokenized_src(self, url, video_id, format_id):
+ token = self._download_json(
+ 'http://tp.srgssr.ch/akahd/token?acl=*',
+ video_id, 'Downloading %s token' % format_id, fatal=False) or {}
+ auth_params = try_get(token, lambda x: x['token']['authparams'])
+ if auth_params:
+ url += ('?' if '?' not in url else '&') + auth_params
+ return url
+
+ def _get_media_data(self, bu, media_type, media_id):
+ query = {'onlyChapters': True} if media_type == 'video' else {}
+ full_media_data = self._download_json(
+ 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
+ % (bu, media_type, media_id),
+ media_id, query=query)['chapterList']
+ try:
+ media_data = next(
+ x for x in full_media_data if x.get('id') == media_id)
+ except StopIteration:
+ raise ExtractorError('No media information found')
+
+ block_reason = media_data.get('blockReason')
+ if block_reason and block_reason in self._ERRORS:
+ message = self._ERRORS[block_reason]
+ if block_reason == 'GEOBLOCK':
+ self.raise_geo_restricted(
+ msg=message, countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ return media_data
+
+ def _real_extract(self, url):
+ bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
+ media_data = self._get_media_data(bu, media_type, media_id)
+ title = media_data['title']
+
+ formats = []
+ q = qualities(['SD', 'HD'])
+ for source in (media_data.get('resourceList') or []):
+ format_url = source.get('url')
+ if not format_url:
+ continue
+ protocol = source.get('protocol')
+ quality = source.get('quality')
+ format_id = []
+ for e in (protocol, source.get('encoding'), quality):
+ if e:
+ format_id.append(e)
+ format_id = '-'.join(format_id)
+
+ if protocol in ('HDS', 'HLS'):
+ if source.get('tokenType') == 'AKAMAI':
+ format_url = self._get_tokenized_src(
+ format_url, media_id, format_id)
+ formats.extend(self._extract_akamai_formats(
+ format_url, media_id))
+ elif protocol == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif protocol in ('HTTP', 'HTTPS'):
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'quality': q(quality),
+ })
+
+ # This is needed because for audio medias the podcast url is usually
+ # always included, even if is only an audio segment and not the
+ # whole episode.
+ if int_or_none(media_data.get('position')) == 0:
+ for p in ('S', 'H'):
+ podcast_url = media_data.get('podcast%sdUrl' % p)
+ if not podcast_url:
+ continue
+ quality = p + 'D'
+ formats.append({
+ 'format_id': 'PODCAST-' + quality,
+ 'url': podcast_url,
+ 'quality': q(quality),
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if media_type == 'video':
+ for sub in (media_data.get('subtitleList') or []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu]
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': media_data.get('description'),
+ 'timestamp': parse_iso8601(media_data.get('date')),
+ 'thumbnail': media_data.get('imageUrl'),
+ 'duration': float_or_none(media_data.get('duration'), 1000),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+
+class SRGSSRPlayIE(InfoExtractor):
+ IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|play)\.)?
+ (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
+ (?:
+ [^/]+/(?P<type>video|audio)/[^?]+|
+ popup(?P<type_2>video|audio)player
+ )
+ \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'md5': '6db2226ba97f62ad42ce09783680046c',
+ 'info_dict': {
+ 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'ext': 'mp4',
+ 'upload_date': '20130701',
+ 'title': 'Snowden beantragt Asyl in Russland',
+ 'timestamp': 1372708215,
+ 'duration': 113.827,
+ 'thumbnail': r're:^https?://.*1383719781\.png$',
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
+ 'info_dict': {
+ 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc',
+ 'ext': 'mp3',
+ 'upload_date': '20151013',
+ 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
+ 'timestamp': 1444709160,
+ 'duration': 336.816,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
+ 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df',
+ 'info_dict': {
+ 'id': '6348260',
+ 'display_id': '6348260',
+ 'ext': 'mp4',
+ 'duration': 1796.76,
+ 'title': 'Le 19h30',
+ 'upload_date': '20141201',
+ 'timestamp': 1417458600,
+ 'thumbnail': r're:^https?://.*\.image',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
+ 'info_dict': {
+ 'id': '42960270',
+ 'ext': 'mp4',
+ 'title': 'Why people were against tax reforms',
+ 'description': 'md5:7ac442c558e9630e947427469c4b824d',
+ 'duration': 94.0,
+ 'upload_date': '20170215',
+ 'timestamp': 1487173560,
+ 'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
+ 'subtitles': 'count:9',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
+ 'only_matching': True,
+ }, {
+ # audio segment, has podcastSdUrl of the full episode
+ 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ bu = mobj.group('bu')
+ media_type = mobj.group('type') or mobj.group('type_2')
+ media_id = mobj.group('id')
+ return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
diff --git a/hypervideo_dl/extractor/srmediathek.py b/hypervideo_dl/extractor/srmediathek.py
new file mode 100644
index 0000000..359dada
--- /dev/null
+++ b/hypervideo_dl/extractor/srmediathek.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .ard import ARDMediathekBaseIE
+from ..utils import (
+ ExtractorError,
+ get_element_by_attribute,
+)
+
+
+class SRMediathekIE(ARDMediathekBaseIE):
+ IE_NAME = 'sr:mediathek'
+ IE_DESC = 'Saarländischer Rundfunk'
+ _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
+ 'info_dict': {
+ 'id': '28455',
+ 'ext': 'mp4',
+ 'title': 'sportarena (26.10.2014)',
+ 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'no longer available',
+ }, {
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
+ 'info_dict': {
+ 'id': '37682',
+ 'ext': 'mp4',
+ 'title': 'Love, Cakes and Rock\'n\'Roll',
+ 'description': 'md5:18bf9763631c7d326c22603681e1123d',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if '>Der gew&uuml;nschte Beitrag ist leider nicht mehr verf&uuml;gbar.<' in webpage:
+ raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
+ media_collection_url = self._search_regex(
+ r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
+ info = self._extract_media_info(media_collection_url, webpage, video_id)
+ info.update({
+ 'id': video_id,
+ 'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/stanfordoc.py b/hypervideo_dl/extractor/stanfordoc.py
new file mode 100644
index 0000000..ae3dd13
--- /dev/null
+++ b/hypervideo_dl/extractor/stanfordoc.py
@@ -0,0 +1,91 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ orderedSet,
+ unescapeHTML,
+)
+
+
+class StanfordOpenClassroomIE(InfoExtractor):
+ IE_NAME = 'stanfordoc'
+ IE_DESC = 'Stanford Open ClassRoom'
+ _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+ _TEST = {
+ 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
+ 'md5': '544a9468546059d4e80d76265b0443b8',
+ 'info_dict': {
+ 'id': 'PracticalUnix_intro-environment',
+ 'ext': 'mp4',
+ 'title': 'Intro Environment',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ if mobj.group('course') and mobj.group('video'): # A specific video
+ course = mobj.group('course')
+ video = mobj.group('video')
+ info = {
+ 'id': course + '_' + video,
+ 'uploader': None,
+ 'upload_date': None,
+ }
+
+ baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
+ xmlUrl = baseUrl + video + '.xml'
+ mdoc = self._download_xml(xmlUrl, info['id'])
+ try:
+ info['title'] = mdoc.findall('./title')[0].text
+ info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
+ except IndexError:
+ raise ExtractorError('Invalid metadata XML file')
+ return info
+ elif mobj.group('course'): # A course page
+ course = mobj.group('course')
+ info = {
+ 'id': course,
+ '_type': 'playlist',
+ 'uploader': None,
+ 'upload_date': None,
+ }
+
+ coursepage = self._download_webpage(
+ url, info['id'],
+ note='Downloading course info page',
+ errnote='Unable to download course info page')
+
+ info['title'] = self._html_search_regex(
+ r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
+
+ info['description'] = self._html_search_regex(
+ r'(?s)<description>([^<]+)</description>',
+ coursepage, 'description', fatal=False)
+
+ links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info
+ else: # Root page
+ info = {
+ 'id': 'Stanford OpenClassroom',
+ '_type': 'playlist',
+ 'uploader': None,
+ 'upload_date': None,
+ }
+ info['title'] = info['id']
+
+ rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
+ rootpage = self._download_webpage(rootURL, info['id'],
+ errnote='Unable to download course info page')
+
+ links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info
diff --git a/hypervideo_dl/extractor/steam.py b/hypervideo_dl/extractor/steam.py
new file mode 100644
index 0000000..a6a191c
--- /dev/null
+++ b/hypervideo_dl/extractor/steam.py
@@ -0,0 +1,149 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ ExtractorError,
+ get_element_by_class,
+ js_to_json,
+)
+
+
+class SteamIE(InfoExtractor):
+ _VALID_URL = r"""(?x)
+ https?://store\.steampowered\.com/
+ (agecheck/)?
+ (?P<urltype>video|app)/ #If the page is only for videos or for a game
+ (?P<gameID>\d+)/?
+ (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
+ |
+ https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
+ """
+ _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
+ _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
+ _TESTS = [{
+ 'url': 'http://store.steampowered.com/video/105600/',
+ 'playlist': [
+ {
+ 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
+ 'info_dict': {
+ 'id': '2040428',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.3 Trailer',
+ 'playlist_index': 1,
+ }
+ },
+ {
+ 'md5': '911672b20064ca3263fa89650ba5a7aa',
+ 'info_dict': {
+ 'id': '2029566',
+ 'ext': 'mp4',
+ 'title': 'Terraria 1.2 Trailer',
+ 'playlist_index': 2,
+ }
+ }
+ ],
+ 'info_dict': {
+ 'id': '105600',
+ 'title': 'Terraria',
+ },
+ 'params': {
+ 'playlistend': 2,
+ }
+ }, {
+ 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
+ 'info_dict': {
+ 'id': 'X8kpJBlzD2E',
+ 'ext': 'mp4',
+ 'upload_date': '20140617',
+ 'title': 'FRONTIERS - Trapping',
+ 'description': 'md5:bf6f7f773def614054089e5769c12a6e',
+ 'uploader': 'AAD Productions',
+ 'uploader_id': 'AtomicAgeDogGames',
+ }
+ }]
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ fileID = m.group('fileID')
+ if fileID:
+ videourl = url
+ playlist_id = fileID
+ else:
+ gameID = m.group('gameID')
+ playlist_id = gameID
+ videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
+
+ self._set_cookie('steampowered.com', 'mature_content', '1')
+
+ webpage = self._download_webpage(videourl, playlist_id)
+
+ if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
+ videourl = self._AGECHECK_TEMPLATE % playlist_id
+ self.report_age_confirmation()
+ webpage = self._download_webpage(videourl, playlist_id)
+
+ flash_vars = self._parse_json(self._search_regex(
+ r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
+ 'flash vars'), playlist_id, js_to_json)
+
+ playlist_title = None
+ entries = []
+ if fileID:
+ playlist_title = get_element_by_class('workshopItemTitle', webpage)
+ for movie in flash_vars.values():
+ if not movie:
+ continue
+ youtube_id = movie.get('YOUTUBE_VIDEO_ID')
+ if not youtube_id:
+ continue
+ entries.append({
+ '_type': 'url',
+ 'url': youtube_id,
+ 'ie_key': 'Youtube',
+ })
+ else:
+ playlist_title = get_element_by_class('apphub_AppName', webpage)
+ for movie_id, movie in flash_vars.items():
+ if not movie:
+ continue
+ video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
+ title = movie.get('MOVIE_NAME')
+ if not title or not video_id:
+ continue
+ entry = {
+ 'id': video_id,
+ 'title': title.replace('+', ' '),
+ }
+ formats = []
+ flv_url = movie.get('FILENAME')
+ if flv_url:
+ formats.append({
+ 'format_id': 'flv',
+ 'url': flv_url,
+ })
+ highlight_element = self._search_regex(
+ r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
+ webpage, 'highlight element', fatal=False)
+ if highlight_element:
+ highlight_attribs = extract_attributes(highlight_element)
+ if highlight_attribs:
+ entry['thumbnail'] = highlight_attribs.get('data-poster')
+ for quality in ('', '-hd'):
+ for ext in ('webm', 'mp4'):
+ video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
+ if video_url:
+ formats.append({
+ 'format_id': ext + quality,
+ 'url': video_url,
+ })
+ if not formats:
+ continue
+ entry['formats'] = formats
+ entries.append(entry)
+ if not entries:
+ raise ExtractorError('Could not find any videos')
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/hypervideo_dl/extractor/stitcher.py b/hypervideo_dl/extractor/stitcher.py
new file mode 100644
index 0000000..8227825
--- /dev/null
+++ b/hypervideo_dl/extractor/stitcher.py
@@ -0,0 +1,144 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ clean_podcast_url,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class StitcherBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
+
+ def _call_api(self, path, video_id, query):
+ resp = self._download_json(
+ 'https://api.prod.stitcher.com/' + path,
+ video_id, query=query)
+ error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
+ if error_massage:
+ raise ExtractorError(error_massage, expected=True)
+ return resp['data']
+
+ def _extract_description(self, data):
+ return clean_html(data.get('html_description') or data.get('description'))
+
+ def _extract_audio_url(self, episode):
+ return url_or_none(episode.get('audio_url') or episode.get('guid'))
+
+ def _extract_show_info(self, show):
+ return {
+ 'thumbnail': show.get('image_base_url'),
+ 'series': show.get('title'),
+ }
+
+ def _extract_episode(self, episode, audio_url, show_info):
+ info = {
+ 'id': compat_str(episode['id']),
+ 'display_id': episode.get('slug'),
+ 'title': episode['title'].strip(),
+ 'description': self._extract_description(episode),
+ 'duration': int_or_none(episode.get('duration')),
+ 'url': clean_podcast_url(audio_url),
+ 'vcodec': 'none',
+ 'timestamp': int_or_none(episode.get('date_published')),
+ 'season_number': int_or_none(episode.get('season')),
+ 'season_id': str_or_none(episode.get('season_id')),
+ }
+ info.update(show_info)
+ return info
+
+
+class StitcherIE(StitcherBaseIE):
+ _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
+ 'md5': 'e9635098e0da10b21a0e2b85585530f6',
+ 'info_dict': {
+ 'id': '40789481',
+ 'ext': 'mp3',
+ 'title': 'Machine Learning Mastery and Cancer Clusters',
+ 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
+ 'duration': 1604,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20151008',
+ 'timestamp': 1444285800,
+ 'series': 'Talking Machines',
+ },
+ }, {
+ 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
+ 'info_dict': {
+ 'id': '40846275',
+ 'display_id': 'the-rare-hourlong-comedy-plus',
+ 'ext': 'mp3',
+ 'title': "The CW's 'Crazy Ex-Girlfriend'",
+ 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
+ 'duration': 2235,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Page Not Found',
+ }, {
+ # escaped title
+ 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ data = self._call_api(
+ 'shows/episodes', audio_id, {'episode_ids': audio_id})
+ episode = data['episodes'][0]
+ audio_url = self._extract_audio_url(episode)
+ if not audio_url:
+ self.raise_login_required()
+ show = try_get(data, lambda x: x['shows'][0], dict) or {}
+ return self._extract_episode(
+ episode, audio_url, self._extract_show_info(show))
+
+
+class StitcherShowIE(StitcherBaseIE):
+ _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.stitcher.com/podcast/the-talking-machines',
+ 'info_dict': {
+ 'id': 'the-talking-machines',
+ 'title': 'Talking Machines',
+ 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
+ },
+ 'playlist_mincount': 106,
+ }, {
+ 'url': 'https://www.stitcher.com/show/the-talking-machines',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ show_slug = self._match_id(url)
+ data = self._call_api(
+ 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
+ show = try_get(data, lambda x: x['shows'][0], dict) or {}
+ show_info = self._extract_show_info(show)
+
+ entries = []
+ for episode in (data.get('episodes') or []):
+ audio_url = self._extract_audio_url(episode)
+ if not audio_url:
+ continue
+ entries.append(self._extract_episode(episode, audio_url, show_info))
+
+ return self.playlist_result(
+ entries, show_slug, show.get('title'),
+ self._extract_description(show))
diff --git a/hypervideo_dl/extractor/storyfire.py b/hypervideo_dl/extractor/storyfire.py
new file mode 100644
index 0000000..9c69862
--- /dev/null
+++ b/hypervideo_dl/extractor/storyfire.py
@@ -0,0 +1,151 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ # HEADRequest,
+ int_or_none,
+ OnDemandPagedList,
+ smuggle_url,
+)
+
+
+class StoryFireBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
+
+ def _call_api(self, path, video_id, resource, query=None):
+ return self._download_json(
+ 'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
+ 'Downloading %s JSON metadata' % resource, query=query)
+
+ def _parse_video(self, video):
+ title = video['title']
+ vimeo_id = self._search_regex(
+ r'https?://player\.vimeo\.com/external/(\d+)',
+ video['vimeoVideoURL'], 'vimeo id')
+
+ # video_url = self._request_webpage(
+ # HEADRequest(video['vimeoVideoURL']), video_id).geturl()
+ # formats = []
+ # for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
+ # formats.extend(self._extract_m3u8_formats(
+ # v_url, video_id, 'mp4', 'm3u8_native',
+ # m3u8_id='hls' + suffix, fatal=False))
+ # formats.extend(self._extract_mpd_formats(
+ # v_url.replace('.m3u8', '.mpd'), video_id,
+ # mpd_id='dash' + suffix, fatal=False))
+ # self._sort_formats(formats)
+
+ uploader_id = video.get('hostID')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': vimeo_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'url': smuggle_url(
+ 'https://player.vimeo.com/video/' + vimeo_id, {
+ 'http_headers': {
+ 'Referer': 'https://storyfire.com/',
+ }
+ }),
+ # 'formats': formats,
+ 'thumbnail': video.get('storyImage'),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(video.get('likesCount')),
+ 'comment_count': int_or_none(video.get('commentsCount')),
+ 'duration': int_or_none(video.get('videoDuration')),
+ 'timestamp': int_or_none(video.get('publishDate')),
+ 'uploader': video.get('username'),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
+ 'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
+ }
+
+
+class StoryFireIE(StoryFireBaseIE):
+ _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
+ _TEST = {
+ 'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
+ 'md5': 'caec54b9e4621186d6079c7ec100c1eb',
+ 'info_dict': {
+ 'id': '378954662',
+ 'ext': 'mp4',
+ 'title': 'Buzzfeed Teaches You About Memes',
+ 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
+ 'timestamp': 1576129028,
+ 'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
+ 'uploader': 'whang!',
+ 'upload_date': '20191212',
+ 'duration': 418,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata']
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._call_api(
+ 'generic/video-detail', video_id, 'video')['video']
+ return self._parse_video(video)
+
+
+class StoryFireUserIE(StoryFireBaseIE):
+ _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
+ _TEST = {
+ 'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
+ 'info_dict': {
+ 'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
+ },
+ 'playlist_mincount': 151,
+ }
+ _PAGE_SIZE = 20
+
+ def _fetch_page(self, user_id, page):
+ videos = self._call_api(
+ 'publicVideos', user_id, 'page %d' % (page + 1), {
+ 'skip': page * self._PAGE_SIZE,
+ })['videos']
+ for video in videos:
+ yield self._parse_video(video)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, user_id), self._PAGE_SIZE)
+ return self.playlist_result(entries, user_id)
+
+
+class StoryFireSeriesIE(StoryFireBaseIE):
+ _VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
+ 'info_dict': {
+ 'id': '-Lq6MsuIHLODO6d2dDkr',
+ },
+ 'playlist_mincount': 13,
+ }, {
+ 'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
+ 'info_dict': {
+ 'id': 'the_mortal_one',
+ },
+ 'playlist_count': 0,
+ }]
+
+ def _extract_videos(self, stories):
+ for story in stories.values():
+ if story.get('hasVideo'):
+ yield self._parse_video(story)
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ stories = self._call_api(
+ 'seriesStories', series_id, 'series stories')
+ return self.playlist_result(self._extract_videos(stories), series_id)
diff --git a/hypervideo_dl/extractor/streamable.py b/hypervideo_dl/extractor/streamable.py
new file mode 100644
index 0000000..3472527
--- /dev/null
+++ b/hypervideo_dl/extractor/streamable.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+)
+
+
+class StreamableIE(InfoExtractor):
+ _VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
+ _TESTS = [
+ {
+ 'url': 'https://streamable.com/dnd1',
+ 'md5': '3e3bc5ca088b48c2d436529b64397fef',
+ 'info_dict': {
+ 'id': 'dnd1',
+ 'ext': 'mp4',
+ 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'uploader': 'teabaker',
+ 'timestamp': 1454964157.35115,
+ 'upload_date': '20160208',
+ 'duration': 61.516,
+ 'view_count': int,
+ }
+ },
+ # older video without bitrate, width/height, etc. info
+ {
+ 'url': 'https://streamable.com/moo',
+ 'md5': '2cf6923639b87fba3279ad0df3a64e73',
+ 'info_dict': {
+ 'id': 'moo',
+ 'ext': 'mp4',
+ 'title': '"Please don\'t eat me!"',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1426115495,
+ 'upload_date': '20150311',
+ 'duration': 12,
+ 'view_count': int,
+ }
+ },
+ {
+ 'url': 'https://streamable.com/e/dnd1',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://streamable.com/s/okkqk/drxjds',
+ 'only_matching': True,
+ }
+ ]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
+ webpage)
+ if mobj:
+ return mobj.group('src')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Note: Using the ajax API, as the public Streamable API doesn't seem
+ # to return video info like the title properly sometimes, and doesn't
+ # include info like the video duration
+ video = self._download_json(
+ 'https://ajax.streamable.com/videos/%s' % video_id, video_id)
+
+ # Format IDs:
+ # 0 The video is being uploaded
+ # 1 The video is being processed
+ # 2 The video has at least one file ready
+ # 3 The video is unavailable due to an error
+ status = video.get('status')
+ if status != 2:
+ raise ExtractorError(
+ 'This video is currently unavailable. It may still be uploading or processing.',
+ expected=True)
+
+ title = video.get('reddit_title') or video['title']
+
+ formats = []
+ for key, info in video['files'].items():
+ if not info.get('url'):
+ continue
+ formats.append({
+ 'format_id': key,
+ 'url': self._proto_relative_url(info['url']),
+ 'width': int_or_none(info.get('width')),
+ 'height': int_or_none(info.get('height')),
+ 'filesize': int_or_none(info.get('size')),
+ 'fps': int_or_none(info.get('framerate')),
+ 'vbr': float_or_none(info.get('bitrate'), 1000)
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': self._proto_relative_url(video.get('thumbnail_url')),
+ 'uploader': video.get('owner', {}).get('user_name'),
+ 'timestamp': float_or_none(video.get('date_added')),
+ 'duration': float_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('plays')),
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/streamcloud.py b/hypervideo_dl/extractor/streamcloud.py
new file mode 100644
index 0000000..984dea4
--- /dev/null
+++ b/hypervideo_dl/extractor/streamcloud.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class StreamcloudIE(InfoExtractor):
+ IE_NAME = 'streamcloud.eu'
+ _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
+
+ _TESTS = [{
+ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube_dl_test_video_____________-BaW_jenozKc.mp4.html',
+ 'md5': '6bea4c7fa5daaacc2a946b7146286686',
+ 'info_dict': {
+ 'id': 'skp9j99s4bpz',
+ 'ext': 'mp4',
+ 'title': 'hypervideo test video \'/\\ ä ↭',
+ },
+ 'skip': 'Only available from the EU'
+ }, {
+ 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = 'http://streamcloud.eu/%s' % video_id
+
+ orig_webpage = self._download_webpage(url, video_id)
+
+ if '>File Not Found<' in orig_webpage:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ fields = re.findall(r'''(?x)<input\s+
+ type="(?:hidden|submit)"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', orig_webpage)
+
+ self._sleep(6, video_id)
+
+ webpage = self._download_webpage(
+ url, video_id, data=urlencode_postdata(fields), headers={
+ b'Content-Type': b'application/x-www-form-urlencoded',
+ })
+
+ try:
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)<', webpage, 'title')
+ video_url = self._search_regex(
+ r'file:\s*"([^"]+)"', webpage, 'video URL')
+ except ExtractorError:
+ message = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
+ webpage, 'message', default=None, group='message')
+ if message:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+ raise
+ thumbnail = self._search_regex(
+ r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/hypervideo_dl/extractor/streamcz.py b/hypervideo_dl/extractor/streamcz.py
new file mode 100644
index 0000000..58e0b4c
--- /dev/null
+++ b/hypervideo_dl/extractor/streamcz.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ sanitized_Request,
+)
+
+
+def _get_api_key(api_path):
+ if api_path.endswith('?'):
+ api_path = api_path[:-1]
+
+ api_key = 'fb5f58a820353bd7095de526253c14fd'
+ a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
+ return hashlib.md5(a.encode('ascii')).hexdigest()
+
+
+class StreamCZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
+ _API_URL = 'http://www.stream.cz/API'
+
+ _TESTS = [{
+ 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
+ 'md5': '934bb6a6d220d99c010783c9719960d5',
+ 'info_dict': {
+ 'id': '765767',
+ 'ext': 'mp4',
+ 'title': 'Peklo na talíři: Éčka pro děti',
+ 'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE',
+ 'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
+ 'duration': 256,
+ },
+ }, {
+ 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
+ 'md5': '849a88c1e1ca47d41403c2ba5e59e261',
+ 'info_dict': {
+ 'id': '10002447',
+ 'ext': 'mp4',
+ 'title': 'Kancelář Blaník: Tři roky pro Mazánka',
+ 'description': 'md5:3862a00ba7bf0b3e44806b544032c859',
+ 'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000',
+ 'duration': 368,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_path = '/episode/%s' % video_id
+
+ req = sanitized_Request(self._API_URL + api_path)
+ req.add_header('Api-Password', _get_api_key(api_path))
+ data = self._download_json(req, video_id)
+
+ formats = []
+ for quality, video in enumerate(data['video_qualities']):
+ for f in video['formats']:
+ typ = f['type'].partition('/')[2]
+ qlabel = video.get('quality_label')
+ formats.append({
+ 'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ,
+ 'format_id': '%s-%s' % (typ, f['quality']),
+ 'url': f['source'],
+ 'height': int_or_none(f['quality'].rstrip('p')),
+ 'quality': quality,
+ })
+ self._sort_formats(formats)
+
+ image = data.get('image')
+ if image:
+ thumbnail = self._proto_relative_url(
+ image.replace('{width}', '1240').replace('{height}', '697'),
+ scheme='http:',
+ )
+ else:
+ thumbnail = None
+
+ stream = data.get('_embedded', {}).get('stream:show', {}).get('name')
+ if stream:
+ title = '%s: %s' % (stream, data['name'])
+ else:
+ title = data['name']
+
+ subtitles = {}
+ srt_url = data.get('subtitles_srt')
+ if srt_url:
+ subtitles['cs'] = [{
+ 'ext': 'srt',
+ 'url': srt_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'description': data.get('web_site_text'),
+ 'duration': int_or_none(data.get('duration')),
+ 'view_count': int_or_none(data.get('views')),
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/streetvoice.py b/hypervideo_dl/extractor/streetvoice.py
new file mode 100644
index 0000000..f21681a
--- /dev/null
+++ b/hypervideo_dl/extractor/streetvoice.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ urljoin,
+)
+
+
+class StreetVoiceIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://streetvoice.com/skippylu/songs/123688/',
+ 'md5': '0eb535970629a5195685355f3ed60bfd',
+ 'info_dict': {
+ 'id': '123688',
+ 'ext': 'mp3',
+ 'title': '流浪',
+ 'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 270,
+ 'upload_date': '20100923',
+ 'uploader': 'Crispy脆樂團',
+ 'uploader_id': '627810',
+ 'uploader_url': 're:^https?://streetvoice.com/skippylu/',
+ 'timestamp': 1285261661,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'track': '流浪',
+ 'track_id': '123688',
+ 'album': '2010',
+ }
+ }, {
+ 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+ base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
+ song = self._download_json(base_url, song_id, query={
+ 'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
+ })
+ title = song['name']
+
+ formats = []
+ for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
+ f_url = (self._download_json(
+ base_url + suffix + '/', song_id,
+ 'Downloading %s format URL' % format_id,
+ data=b'', fatal=False) or {}).get('file')
+ if not f_url:
+ continue
+ f = {
+ 'ext': 'mp3',
+ 'format_id': format_id,
+ 'url': f_url,
+ 'vcodec': 'none',
+ }
+ if format_id == 'hls':
+ f['protocol'] = 'm3u8_native'
+ abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
+ if abr:
+ abr = int(abr)
+ f.update({
+ 'abr': abr,
+ 'tbr': abr,
+ })
+ formats.append(f)
+
+ user = song.get('user') or {}
+ username = user.get('username')
+ get_count = lambda x: int_or_none(song.get(x + '_count'))
+
+ return {
+ 'id': song_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(song.get('synopsis')),
+ 'thumbnail': song.get('image'),
+ 'duration': int_or_none(song.get('length')),
+ 'timestamp': parse_iso8601(song.get('created_at')),
+ 'uploader': try_get(user, lambda x: x['profile']['nickname']),
+ 'uploader_id': str_or_none(user.get('id')),
+ 'uploader_url': urljoin(url, '/%s/' % username) if username else None,
+ 'view_count': get_count('plays'),
+ 'like_count': get_count('likes'),
+ 'comment_count': get_count('comments'),
+ 'repost_count': get_count('share'),
+ 'track': title,
+ 'track_id': song_id,
+ 'album': try_get(song, lambda x: x['album']['name']),
+ }
diff --git a/hypervideo_dl/extractor/stretchinternet.py b/hypervideo_dl/extractor/stretchinternet.py
new file mode 100644
index 0000000..ec08eae
--- /dev/null
+++ b/hypervideo_dl/extractor/stretchinternet.py
@@ -0,0 +1,37 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class StretchInternetIE(InfoExtractor):
+ _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video',
+ 'info_dict': {
+ 'id': '573272',
+ 'ext': 'mp4',
+ 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA',
+ # 'timestamp': 1575668361,
+ # 'upload_date': '20191206',
+ 'uploader_id': '99997',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ media_url = self._download_json(
+ 'https://core.stretchlive.com/trinity/event/tcg/' + video_id,
+ video_id)[0]['media'][0]['url']
+ event = self._download_json(
+ 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
+ video_id, query={'eventID': video_id, 'token': 'asdf'})['event']
+
+ return {
+ 'id': video_id,
+ 'title': event['title'],
+ # TODO: parse US timezone abbreviations
+ # 'timestamp': event.get('dateTimeString'),
+ 'url': 'https://' + media_url,
+ 'uploader_id': event.get('ownerID'),
+ }
diff --git a/hypervideo_dl/extractor/stv.py b/hypervideo_dl/extractor/stv.py
new file mode 100644
index 0000000..539220a
--- /dev/null
+++ b/hypervideo_dl/extractor/stv.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ str_or_none,
+ try_get,
+)
+
+
+class STVPlayerIE(InfoExtractor):
+ IE_NAME = 'stv:player'
+ _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
+ _TESTS = [{
+ # shortform
+ 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
+ 'md5': '5adf9439c31d554f8be0707c7abe7e0a',
+ 'info_dict': {
+ 'id': '5333973339001',
+ 'ext': 'mp4',
+ 'upload_date': '20170301',
+ 'title': '60 seconds on set with Laura Norton',
+ 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!",
+ 'timestamp': 1488388054,
+ 'uploader_id': '1486976045',
+ },
+ 'skip': 'this resource is unavailable outside of the UK',
+ }, {
+ # episodes
+ 'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
+ _PTYPE_MAP = {
+ 'episode': 'episodes',
+ 'video': 'shortform',
+ }
+
+ def _real_extract(self, url):
+ ptype, video_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, video_id, fatal=False) or ''
+ props = (self._parse_json(self._search_regex(
+ r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
+ webpage, 'next data', default='{}'), video_id,
+ fatal=False) or {}).get('props') or {}
+ player_api_cache = try_get(
+ props, lambda x: x['initialReduxState']['playerApiCache']) or {}
+
+ api_path, resp = None, {}
+ for k, v in player_api_cache.items():
+ if k.startswith('/episodes/') or k.startswith('/shortform/'):
+ api_path, resp = k, v
+ break
+ else:
+ episode_id = str_or_none(try_get(
+ props, lambda x: x['pageProps']['episodeId']))
+ api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id)
+
+ result = resp.get('results')
+ if not result:
+ resp = self._download_json(
+ 'https://player.api.stv.tv/v1' + api_path, video_id)
+ result = resp['results']
+
+ video = result['video']
+ video_id = compat_str(video['id'])
+
+ subtitles = {}
+ _subtitles = result.get('_subtitles') or {}
+ for ext, sub_url in _subtitles.items():
+ subtitles.setdefault('en', []).append({
+ 'ext': 'vtt' if ext == 'webvtt' else ext,
+ 'url': sub_url,
+ })
+
+ programme = result.get('programme') or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}),
+ 'description': result.get('summary'),
+ 'duration': float_or_none(video.get('length'), 1000),
+ 'subtitles': subtitles,
+ 'view_count': int_or_none(result.get('views')),
+ 'series': programme.get('name') or programme.get('shortName'),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/hypervideo_dl/extractor/sunporno.py b/hypervideo_dl/extractor/sunporno.py
new file mode 100644
index 0000000..6805116
--- /dev/null
+++ b/hypervideo_dl/extractor/sunporno.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ qualities,
+ determine_ext,
+)
+
+
+class SunPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.sunporno.com/videos/807778/',
+ 'md5': '507887e29033502f29dba69affeebfc9',
+ 'info_dict': {
+ 'id': '807778',
+ 'ext': 'mp4',
+ 'title': 'md5:0a400058e8105d39e35c35e7c5184164',
+ 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 302,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://embeds.sunporno.com/embed/807778',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.sunporno.com/videos/%s' % video_id, video_id)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = self._html_search_regex(
+ r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+
+ duration = parse_duration(self._search_regex(
+ (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
+ r'>Duration:\s*<span[^>]+>\s*(\d+:\d+)\s*<'),
+ webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._html_search_regex(
+ r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._html_search_regex(
+ r'(\d+)</b> Comments?',
+ webpage, 'comment count', fatal=False, default=None))
+
+ formats = []
+ quality = qualities(['mp4', 'flv'])
+ for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
+ video_ext = determine_ext(video_url)
+ formats.append({
+ 'url': video_url,
+ 'format_id': video_ext,
+ 'quality': quality(video_ext),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/sverigesradio.py b/hypervideo_dl/extractor/sverigesradio.py
new file mode 100644
index 0000000..aa0691f
--- /dev/null
+++ b/hypervideo_dl/extractor/sverigesradio.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ str_or_none,
+)
+
+
+class SverigesRadioBaseIE(InfoExtractor):
+ _BASE_URL = 'https://sverigesradio.se/sida/playerajax/'
+ _QUALITIES = ['low', 'medium', 'high']
+ _EXT_TO_CODEC_MAP = {
+ 'mp3': 'mp3',
+ 'm4a': 'aac',
+ }
+ _CODING_FORMAT_TO_ABR_MAP = {
+ 5: 128,
+ 11: 192,
+ 12: 32,
+ 13: 96,
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ query = {
+ 'id': audio_id,
+ 'type': self._AUDIO_TYPE,
+ }
+
+ item = self._download_json(
+ self._BASE_URL + 'audiometadata', audio_id,
+ 'Downloading audio JSON metadata', query=query)['items'][0]
+ title = item['subtitle']
+
+ query['format'] = 'iis'
+ urls = []
+ formats = []
+ for quality in self._QUALITIES:
+ query['quality'] = quality
+ audio_url_data = self._download_json(
+ self._BASE_URL + 'getaudiourl', audio_id,
+ 'Downloading %s format JSON metadata' % quality,
+ fatal=False, query=query) or {}
+ audio_url = audio_url_data.get('audioUrl')
+ if not audio_url or audio_url in urls:
+ continue
+ urls.append(audio_url)
+ ext = determine_ext(audio_url)
+ coding_format = audio_url_data.get('codingFormat')
+ abr = int_or_none(self._search_regex(
+ r'_a(\d+)\.m4a', audio_url, 'audio bitrate',
+ default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format)
+ formats.append({
+ 'abr': abr,
+ 'acodec': self._EXT_TO_CODEC_MAP.get(ext),
+ 'ext': ext,
+ 'format_id': str_or_none(coding_format),
+ 'vcodec': 'none',
+ 'url': audio_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'formats': formats,
+ 'series': item.get('title'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('displayimageurl'),
+ 'description': item.get('description'),
+ }
+
+
+class SverigesRadioPublicationIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:publication'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
+ 'md5': '6a4917e1923fccb080e5a206a5afa542',
+ 'info_dict': {
+ 'id': '7038546',
+ 'ext': 'm4a',
+ 'duration': 132,
+ 'series': 'Nyheter (Ekot)',
+ 'title': 'Esa Teittinen: Sanningen har inte kommit fram',
+ 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
+ 'only_matching': True,
+ }]
+ _AUDIO_TYPE = 'publication'
+
+
+class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:episode'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
+ 'md5': '20dc4d8db24228f846be390b0c59a07c',
+ 'info_dict': {
+ 'id': '1140922',
+ 'ext': 'mp3',
+ 'duration': 3307,
+ 'series': 'Konflikt',
+ 'title': 'Metoo och valen',
+ 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ }
+ }
+ _AUDIO_TYPE = 'episode'
diff --git a/hypervideo_dl/extractor/svt.py b/hypervideo_dl/extractor/svt.py
new file mode 100644
index 0000000..a5bb6da
--- /dev/null
+++ b/hypervideo_dl/extractor/svt.py
@@ -0,0 +1,425 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ dict_get,
+ int_or_none,
+ unified_timestamp,
+ str_or_none,
+ strip_or_none,
+ try_get,
+)
+
+
+class SVTBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['SE']
+
+ def _extract_video(self, video_info, video_id):
+ is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
+ m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
+ formats = []
+ for vr in video_info['videoReferences']:
+ player_type = vr.get('playerType') or vr.get('format')
+ vurl = vr['url']
+ ext = determine_ext(vurl)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ vurl, video_id,
+ ext='mp4', entry_protocol=m3u8_protocol,
+ m3u8_id=player_type, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ vurl + '?hdcore=3.3.0', video_id,
+ f4m_id=player_type, fatal=False))
+ elif ext == 'mpd':
+ if player_type == 'dashhbbtv':
+ formats.extend(self._extract_mpd_formats(
+ vurl, video_id, mpd_id=player_type, fatal=False))
+ else:
+ formats.append({
+ 'format_id': player_type,
+ 'url': vurl,
+ })
+ rights = try_get(video_info, lambda x: x['rights'], dict) or {}
+ if not formats and rights.get('geoBlockedSweden'):
+ self.raise_geo_restricted(
+ 'This video is only available in Sweden',
+ countries=self._GEO_COUNTRIES)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
+ if isinstance(subtitle_references, list):
+ for sr in subtitle_references:
+ subtitle_url = sr.get('url')
+ subtitle_lang = sr.get('language', 'sv')
+ if subtitle_url:
+ if determine_ext(subtitle_url) == 'm3u8':
+ # TODO(yan12125): handle WebVTT in m3u8 manifests
+ continue
+
+ subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
+
+ title = video_info.get('title')
+
+ series = video_info.get('programTitle')
+ season_number = int_or_none(video_info.get('season'))
+ episode = video_info.get('episodeTitle')
+ episode_number = int_or_none(video_info.get('episodeNumber'))
+
+ timestamp = unified_timestamp(rights.get('validFrom'))
+ duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
+ age_limit = None
+ adult = dict_get(
+ video_info, ('inappropriateForChildren', 'blockedForChildren'),
+ skip_false_values=False)
+ if adult is not None:
+ age_limit = 18 if adult else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': age_limit,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'is_live': is_live,
+ }
+
+
+class SVTIE(SVTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
+ 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
+ 'info_dict': {
+ 'id': '2900353',
+ 'ext': 'mp4',
+ 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ widget_id = mobj.group('widget_id')
+ article_id = mobj.group('id')
+
+ info = self._download_json(
+ 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
+ article_id)
+
+ info_dict = self._extract_video(info['video'], article_id)
+ info_dict['title'] = info['context']['title']
+ return info_dict
+
+
+class SVTPlayBaseIE(SVTBaseIE):
+ _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
+
+
+class SVTPlayIE(SVTPlayBaseIE):
+ IE_DESC = 'SVT Play and Öppet arkiv'
+ _VALID_URL = r'''(?x)
+ (?:
+ (?:
+ svt:|
+ https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
+ )
+ (?P<svt_id>[^/?#&]+)|
+ https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
+ (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://www.svtplay.se/video/30479064',
+ 'md5': '2382036fd6f8c994856c323fe51c426e',
+ 'info_dict': {
+ 'id': '8zVbDPA',
+ 'ext': 'mp4',
+ 'title': 'Designdrömmar i Stenungsund',
+ 'timestamp': 1615770000,
+ 'upload_date': '20210315',
+ 'duration': 3519,
+ 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
+ 'age_limit': 0,
+ 'subtitles': {
+ 'sv': [{
+ 'ext': 'vtt',
+ }]
+ },
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ # skip for now due to download test asserts that segment is > 10000 bytes and svt uses
+ # init segments that are smaller
+ # AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
+ 'only_matching': True,
+ }, {
+ # geo restricted to Sweden
+ 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svtplay.se/kanaler/svt1',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:1376446-003A',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:14278044',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:eWv5MLX',
+ 'only_matching': True,
+ }]
+
+ def _adjust_title(self, info):
+ if info['is_live']:
+ info['title'] = self._live_title(info['title'])
+
+ def _extract_by_video_id(self, video_id, webpage=None):
+ data = self._download_json(
+ 'https://api.svt.se/videoplayer-api/video/%s' % video_id,
+ video_id, headers=self.geo_verification_headers())
+ info_dict = self._extract_video(data, video_id)
+ if not info_dict.get('title'):
+ title = dict_get(info_dict, ('episode', 'series'))
+ if not title and webpage:
+ title = re.sub(
+ r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
+ if not title:
+ title = video_id
+ info_dict['title'] = title
+ self._adjust_title(info_dict)
+ return info_dict
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ svt_id = mobj.group('svt_id') or mobj.group('modal_id')
+
+ if svt_id:
+ return self._extract_by_video_id(svt_id)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
+ group='json'),
+ video_id, fatal=False)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ if data:
+ video_info = try_get(
+ data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
+ dict)
+ if video_info:
+ info_dict = self._extract_video(video_info, video_id)
+ info_dict.update({
+ 'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
+ 'thumbnail': thumbnail,
+ })
+ self._adjust_title(info_dict)
+ return info_dict
+
+ svt_id = try_get(
+ data, lambda x: x['statistics']['dataLake']['content']['id'],
+ compat_str)
+
+ if not svt_id:
+ svt_id = self._search_regex(
+ (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
+ r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id),
+ r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
+ r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
+ r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
+ r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
+ r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
+ webpage, 'video id')
+
+ info_dict = self._extract_by_video_id(svt_id, webpage)
+ info_dict['thumbnail'] = thumbnail
+
+ return info_dict
+
+
+class SVTSeriesIE(SVTPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
+ _TESTS = [{
+ 'url': 'https://www.svtplay.se/rederiet',
+ 'info_dict': {
+ 'id': '14445680',
+ 'title': 'Rederiet',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
+ },
+ 'playlist_mincount': 318,
+ }, {
+ 'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
+ 'info_dict': {
+ 'id': 'season-2-14445680',
+ 'title': 'Rederiet - Säsong 2',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ series_slug, season_id = re.match(self._VALID_URL, url).groups()
+
+ series = self._download_json(
+ 'https://api.svt.se/contento/graphql', series_slug,
+ 'Downloading series page', query={
+ 'query': '''{
+ listablesBySlug(slugs: ["%s"]) {
+ associatedContent(include: [productionPeriod, season]) {
+ items {
+ item {
+ ... on Episode {
+ videoSvtId
+ }
+ }
+ }
+ id
+ name
+ }
+ id
+ longDescription
+ name
+ shortDescription
+ }
+}''' % series_slug,
+ })['data']['listablesBySlug'][0]
+
+ season_name = None
+
+ entries = []
+ for season in series['associatedContent']:
+ if not isinstance(season, dict):
+ continue
+ if season_id:
+ if season.get('id') != season_id:
+ continue
+ season_name = season.get('name')
+ items = season.get('items')
+ if not isinstance(items, list):
+ continue
+ for item in items:
+ video = item.get('item') or {}
+ content_id = video.get('videoSvtId')
+ if not content_id or not isinstance(content_id, compat_str):
+ continue
+ entries.append(self.url_result(
+ 'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
+
+ title = series.get('name')
+ season_name = season_name or season_id
+
+ if title and season_name:
+ title = '%s - %s' % (title, season_name)
+ elif season_id:
+ title = season_id
+
+ return self.playlist_result(
+ entries, season_id or series.get('id'), title,
+ dict_get(series, ('longDescription', 'shortDescription')))
+
+
+class SVTPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
+ _TESTS = [{
+ 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
+ 'info_dict': {
+ 'id': '25298267',
+ 'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
+ 'info_dict': {
+ 'id': '24243746',
+ 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
+ },
+ 'playlist_count': 2,
+ }, {
+ # only programTitle
+ 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+ 'info_dict': {
+ 'id': '8439V2K',
+ 'ext': 'mp4',
+ 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ path, display_id = re.match(self._VALID_URL, url).groups()
+
+ article = self._download_json(
+ 'https://api.svt.se/nss-api/page/' + path, display_id,
+ query={'q': 'articles'})['articles']['content'][0]
+
+ entries = []
+
+ def _process_content(content):
+ if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
+ video_id = compat_str(content['image']['svtId'])
+ entries.append(self.url_result(
+ 'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
+
+ for media in article.get('media', []):
+ _process_content(media)
+
+ for obj in article.get('structuredBody', []):
+ _process_content(obj.get('content') or {})
+
+ return self.playlist_result(
+ entries, str_or_none(article.get('id')),
+ strip_or_none(article.get('title')))
diff --git a/hypervideo_dl/extractor/swrmediathek.py b/hypervideo_dl/extractor/swrmediathek.py
new file mode 100644
index 0000000..0f61597
--- /dev/null
+++ b/hypervideo_dl/extractor/swrmediathek.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+ determine_protocol,
+)
+
+
+class SWRMediathekIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _TESTS = [{
+ 'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
+ 'md5': '8c5f6f0172753368547ca8413a7768ac',
+ 'info_dict': {
+ 'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
+ 'ext': 'mp4',
+ 'title': 'SWR odysso',
+ 'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
+ 'thumbnail': r're:^http:.*\.jpg$',
+ 'duration': 2602,
+ 'upload_date': '20140515',
+ 'uploader': 'SWR Fernsehen',
+ 'uploader_id': '990030',
+ },
+ }, {
+ 'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
+ 'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
+ 'info_dict': {
+ 'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
+ 'ext': 'mp4',
+ 'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
+ 'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 5305,
+ 'upload_date': '20140516',
+ 'uploader': 'SWR Fernsehen',
+ 'uploader_id': '990030',
+ },
+ 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink',
+ }, {
+ 'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
+ 'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
+ 'info_dict': {
+ 'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
+ 'ext': 'mp3',
+ 'title': 'Saša Stanišic: Vor dem Fest',
+ 'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 3366,
+ 'upload_date': '20140520',
+ 'uploader': 'SWR 2',
+ 'uploader_id': '284670',
+ },
+ 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id,
+ video_id, 'Downloading video JSON')
+
+ attr = video['attr']
+ title = attr['entry_title']
+ media_type = attr.get('entry_etype')
+
+ formats = []
+ for entry in video.get('sub', []):
+ if entry.get('name') != 'entry_media':
+ continue
+
+ entry_attr = entry.get('attr', {})
+ f_url = entry_attr.get('val2')
+ if not f_url:
+ continue
+ codec = entry_attr.get('val0')
+ if codec == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif codec == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ f_url + '?hdcore=3.7.0', video_id,
+ f4m_id='hds', fatal=False))
+ else:
+ formats.append({
+ 'format_id': determine_protocol({'url': f_url}),
+ 'url': f_url,
+ 'quality': int_or_none(entry_attr.get('val1')),
+ 'vcodec': codec if media_type == 'Video' else 'none',
+ 'acodec': codec if media_type == 'Audio' else None,
+ })
+ self._sort_formats(formats)
+
+ upload_date = None
+ entry_pdatet = attr.get('entry_pdatet')
+ if entry_pdatet:
+ upload_date = entry_pdatet[:-4]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': attr.get('entry_descl'),
+ 'thumbnail': attr.get('entry_image_16_9'),
+ 'duration': parse_duration(attr.get('entry_durat')),
+ 'upload_date': upload_date,
+ 'uploader': attr.get('channel_title'),
+ 'uploader_id': attr.get('channel_idkey'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/syfy.py b/hypervideo_dl/extractor/syfy.py
new file mode 100644
index 0000000..def7e5a
--- /dev/null
+++ b/hypervideo_dl/extractor/syfy.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ update_url_query,
+ smuggle_url,
+)
+
+
+class SyfyIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
+ 'info_dict': {
+ 'id': '2968097',
+ 'ext': 'mp4',
+ 'title': 'The Internet Ruined My Life: Season 1 Trailer',
+ 'description': 'One tweet, one post, one click, can destroy everything.',
+ 'uploader': 'NBCU-MPAT',
+ 'upload_date': '20170113',
+ 'timestamp': 1484345640,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ syfy_mpx = list(self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
+ display_id)['syfy']['syfy_mpx'].values())[0]
+ video_id = syfy_mpx['mpxGUID']
+ title = syfy_mpx['episodeTitle']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ if syfy_mpx.get('entitlement') == 'auth':
+ resource = self._get_mvpd_resource(
+ 'syfy', title, video_id,
+ syfy_mpx.get('mpxRating', 'TV-14'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'syfy', resource)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(update_url_query(
+ self._proto_relative_url(syfy_mpx['releaseURL']), query),
+ {'force_smil_url': True}),
+ 'title': title,
+ 'id': video_id,
+ 'display_id': display_id,
+ }
diff --git a/hypervideo_dl/extractor/sztvhu.py b/hypervideo_dl/extractor/sztvhu.py
new file mode 100644
index 0000000..cfad331
--- /dev/null
+++ b/hypervideo_dl/extractor/sztvhu.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class SztvHuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
+ 'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
+ 'info_dict': {
+ 'id': '20130909',
+ 'ext': 'mp4',
+ 'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
+ 'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_file = self._search_regex(
+ r'file: "...:(.*?)",', webpage, 'video file')
+ title = self._html_search_regex(
+ r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"',
+ webpage, 'video title')
+ description = self._html_search_regex(
+ r'<meta name="description" content="([^"]*)"/>',
+ webpage, 'video description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video_url = 'http://media.sztv.hu/vod/' + video_file
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/tagesschau.py b/hypervideo_dl/extractor/tagesschau.py
new file mode 100644
index 0000000..8ceab7e
--- /dev/null
+++ b/hypervideo_dl/extractor/tagesschau.py
@@ -0,0 +1,311 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+ parse_iso8601,
+ parse_filesize,
+)
+
+
+class TagesschauPlayerIE(InfoExtractor):
+ IE_NAME = 'tagesschau:player'
+ _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
+ 'md5': '8d09548d5c15debad38bee3a4d15ca21',
+ 'info_dict': {
+ 'id': '179517',
+ 'ext': 'mp4',
+ 'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ 'formats': 'mincount:6',
+ },
+ }, {
+ 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
+ 'md5': '76e6eec6ebd40740671cf0a2c88617e5',
+ 'info_dict': {
+ 'id': '29417',
+ 'ext': 'mp3',
+ 'title': 'Trabi - Bye, bye Rennpappe',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ 'formats': 'mincount:2',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
+ 'only_matching': True,
+ }]
+
+ _FORMATS = {
+ 'xs': {'quality': 0},
+ 's': {'width': 320, 'height': 180, 'quality': 1},
+ 'm': {'width': 512, 'height': 288, 'quality': 2},
+ 'l': {'width': 960, 'height': 540, 'quality': 3},
+ 'xl': {'width': 1280, 'height': 720, 'quality': 4},
+ 'xxl': {'quality': 5},
+ }
+
+ def _extract_via_api(self, kind, video_id):
+ info = self._download_json(
+ 'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
+ video_id)
+ title = info['headline']
+ formats = []
+ for media in info['mediadata']:
+ for format_id, format_url in media.items():
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls'))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'vcodec': 'none' if kind == 'audio' else None,
+ })
+ self._sort_formats(formats)
+ timestamp = parse_iso8601(info.get('date'))
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ # kind = mobj.group('kind').lower()
+ # if kind == 'video':
+ # return self._extract_via_api(kind, video_id)
+
+ # JSON api does not provide some audio formats (e.g. ogg) thus
+ # extracting audio via webpage
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage).strip()
+ formats = []
+
+ for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
+ media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
+ if not media:
+ continue
+ src = media.get('src')
+ if not src:
+ return
+ quality = media.get('quality')
+ kind = media.get('type', '').split('/')[0]
+ ext = determine_ext(src)
+ f = {
+ 'url': src,
+ 'format_id': '%s_%s' % (quality, ext) if quality else ext,
+ 'ext': ext,
+ 'vcodec': 'none' if kind == 'audio' else None,
+ }
+ f.update(self._FORMATS.get(quality, {}))
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class TagesschauIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
+ 'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
+ 'info_dict': {
+ 'id': 'video-102143',
+ 'ext': 'mp4',
+ 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
+ 'description': '18.07.2015 20:10 Uhr',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
+ 'md5': '3c54c1f6243d279b706bde660ceec633',
+ 'info_dict': {
+ 'id': 'ts-5727',
+ 'ext': 'mp4',
+ 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
+ 'description': 'md5:695c01bfd98b7e313c501386327aea59',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ },
+ }, {
+ # exclusive audio
+ 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
+ 'md5': '76e6eec6ebd40740671cf0a2c88617e5',
+ 'info_dict': {
+ 'id': 'audio-29417',
+ 'ext': 'mp3',
+ 'title': 'Trabi - Bye, bye Rennpappe',
+ 'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ },
+ }, {
+ # audio in article
+ 'url': 'http://www.tagesschau.de/inland/bnd-303.html',
+ 'md5': 'e0916c623e85fc1d2b26b78f299d3958',
+ 'info_dict': {
+ 'id': 'bnd-303',
+ 'ext': 'mp3',
+ 'title': 'Viele Baustellen für neuen BND-Chef',
+ 'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
+ 'info_dict': {
+ 'id': 'afd-parteitag-135',
+ 'title': 'Möchtegern-Underdog mit Machtanspruch',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tagesschau.de/100sekunden/index.html',
+ 'only_matching': True,
+ }, {
+ # playlist article with collapsing sections
+ 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
+
+ def _extract_formats(self, download_text, media_kind):
+ links = re.finditer(
+ r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+ download_text)
+ formats = []
+ for l in links:
+ link_url = l.group('url')
+ if not link_url:
+ continue
+ format_id = self._search_regex(
+ r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
+ default=determine_ext(link_url))
+ format = {
+ 'format_id': format_id,
+ 'url': l.group('url'),
+ 'format_name': l.group('name'),
+ }
+ title = l.group('title')
+ if title:
+ if media_kind.lower() == 'video':
+ m = re.match(
+ r'''(?x)
+ Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+ (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+ (?P<vbr>[0-9]+)kbps&\#10;
+ Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+ Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+ title)
+ if m:
+ format.update({
+ 'format_note': m.group('audio_desc'),
+ 'vcodec': m.group('vcodec'),
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ 'abr': int(m.group('abr')),
+ 'vbr': int(m.group('vbr')),
+ 'filesize_approx': parse_filesize(m.group('filesize_approx')),
+ })
+ else:
+ m = re.match(
+ r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
+ title)
+ if m:
+ format.update({
+ 'format_note': '%s, %s' % (m.group('format'), m.group('note')),
+ 'vcodec': 'none',
+ 'abr': int(m.group('abr')),
+ })
+ formats.append(format)
+ self._sort_formats(formats)
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('path')
+ display_id = video_id.lstrip('-')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+
+ DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
+
+ webpage_type = self._og_search_property('type', webpage, default=None)
+ if webpage_type == 'website': # Article
+ entries = []
+ for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
+ r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+ webpage), 1):
+ entries.append({
+ 'id': '%s-%d' % (display_id, num),
+ 'title': '%s' % entry_title,
+ 'formats': self._extract_formats(download_text, media_kind),
+ })
+ if len(entries) > 1:
+ return self.playlist_result(entries, display_id, title)
+ formats = entries[0]['formats']
+ else: # Assume single video
+ download_text = self._search_regex(
+ DOWNLOAD_REGEX, webpage, 'download links', group='links')
+ media_kind = self._search_regex(
+ DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
+ formats = self._extract_formats(download_text, media_kind)
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._html_search_regex(
+ r'(?s)<p class="teasertext">(.*?)</p>',
+ webpage, 'description', default=None)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/tass.py b/hypervideo_dl/extractor/tass.py
new file mode 100644
index 0000000..6d336da
--- /dev/null
+++ b/hypervideo_dl/extractor/tass.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ qualities,
+)
+
+
+class TassIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://tass.ru/obschestvo/1586870',
+ 'md5': '3b4cdd011bc59174596b6145cda474a4',
+ 'info_dict': {
+ 'id': '1586870',
+ 'ext': 'mp4',
+ 'title': 'Посетителям московского зоопарка показали красную панду',
+ 'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://itar-tass.com/obschestvo/1600009',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ sources = json.loads(js_to_json(self._search_regex(
+ r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
+
+ quality = qualities(['sd', 'hd'])
+
+ formats = []
+ for source in sources:
+ video_url = source.get('file')
+ if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
+ continue
+ label = source.get('label')
+ formats.append({
+ 'url': video_url,
+ 'format_id': label,
+ 'quality': quality(label),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tbs.py b/hypervideo_dl/extractor/tbs.py
new file mode 100644
index 0000000..e8a7c65
--- /dev/null
+++ b/hypervideo_dl/extractor/tbs.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .turner import TurnerBaseIE
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+)
+
+
+class TBSIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
+ _TESTS = [{
+ 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
+ 'info_dict': {
+ 'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
+ 'ext': 'mp4',
+ 'title': 'Monster',
+ 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
+ 'timestamp': 1508175329,
+ 'upload_date': '20171016',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ site, path, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
+ webpage, 'drupal setting'), display_id)
+ video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
+
+ media_id = video_data['mediaID']
+ title = video_data['title']
+ tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
+ drupal_settings['ngtv_token_url']).query)
+
+ info = self._extract_ngtv_info(
+ media_id, tokenizer_query, {
+ 'url': url,
+ 'site_name': site[:3].upper(),
+ 'auth_required': video_data.get('authRequired') == '1',
+ })
+
+ thumbnails = []
+ for image_id, image in video_data.get('images', {}).items():
+ image_url = image.get('url')
+ if not image_url or image.get('type') != 'video':
+ continue
+ i = {
+ 'id': image_id,
+ 'url': image_url,
+ }
+ mobj = re.search(r'(\d+)x(\d+)', image_url)
+ if mobj:
+ i.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+ thumbnails.append(i)
+
+ info.update({
+ 'id': media_id,
+ 'title': title,
+ 'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
+ 'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
+ 'timestamp': int_or_none(video_data.get('created')),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ 'thumbnails': thumbnails,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/tdslifeway.py b/hypervideo_dl/extractor/tdslifeway.py
new file mode 100644
index 0000000..101c6ee
--- /dev/null
+++ b/hypervideo_dl/extractor/tdslifeway.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TDSLifewayIE(InfoExtractor):
+ _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html'
+
+ _TEST = {
+ # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
+ 'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb&registration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
+ 'info_dict': {
+ 'id': '3453494717001',
+ 'ext': 'mp4',
+ 'title': 'The Gospel by Numbers',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20140410',
+ 'description': 'Coming soon from T4G 2014!',
+ 'uploader_id': '2034960640001',
+ 'timestamp': 1397145591,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ brightcove_id = self._match_id(url)
+ return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/hypervideo_dl/extractor/teachable.py b/hypervideo_dl/extractor/teachable.py
new file mode 100644
index 0000000..2394f86
--- /dev/null
+++ b/hypervideo_dl/extractor/teachable.py
@@ -0,0 +1,298 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .wistia import WistiaIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ get_element_by_class,
+ strip_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeachableBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'teachable'
+ _URL_PREFIX = 'teachable:'
+
+ _SITES = {
+ # Only notable ones here
+ 'v1.upskillcourses.com': 'upskill',
+ 'gns3.teachable.com': 'gns3',
+ 'academyhacker.com': 'academyhacker',
+ 'stackskills.com': 'stackskills',
+ 'market.saleshacker.com': 'saleshacker',
+ 'learnability.org': 'learnability',
+ 'edurila.com': 'edurila',
+ 'courses.workitdaily.com': 'workitdaily',
+ }
+
+ _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, site):
+ if self._logged_in:
+ return
+
+ username, password = self._get_login_info(
+ netrc_machine=self._SITES.get(site, site))
+ if username is None:
+ return
+
+ login_page, urlh = self._download_webpage_handle(
+ 'https://%s/sign_in' % site, None,
+ 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']user-signout',
+ r'<a[^>]+\bhref=["\']/sign_out',
+ r'Log\s+[Oo]ut\s*<'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_url = urlh.geturl()
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
+ 'post url', default=login_url, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = urljoin(login_url, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': login_url,
+ })
+
+ if '>I accept the new Privacy Policy<' in response:
+ raise ExtractorError(
+ 'Unable to login: %s asks you to accept new Privacy Policy. '
+ 'Go to https://%s/ and accept.' % (site, site), expected=True)
+
+ # Successful login
+ if is_logged(response):
+ self._logged_in = True
+ return
+
+ message = get_element_by_class('alert', response)
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % clean_html(message), expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+
+class TeachableIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /courses/[^/]+/lectures/(?P<id>\d+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+
+ _TESTS = [{
+ 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
+ 'info_dict': {
+ 'id': 'untlgzk1v7',
+ 'ext': 'bin',
+ 'title': 'Overview',
+ 'description': 'md5:071463ff08b86c208811130ea1c2464c',
+ 'duration': 736.4,
+ 'timestamp': 1542315762,
+ 'upload_date': '20181115',
+ 'chapter': 'Welcome',
+ 'chapter_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _is_teachable(webpage):
+ return 'teachableTracker.linker:autoLink' in webpage and re.search(
+ r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
+ webpage)
+
+ @staticmethod
+ def _extract_url(webpage, source_url):
+ if not TeachableIE._is_teachable(webpage):
+ return
+ if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
+ return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site') or mobj.group('site_t')
+ video_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ url = url[len(self._URL_PREFIX):]
+
+ webpage = self._download_webpage(url, video_id)
+
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if not wistia_urls:
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']lecture-contents-locked',
+ r'>\s*Lecture contents locked',
+ r'id=["\']lecture-locked',
+ # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313
+ r'class=["\'](?:inner-)?lesson-locked',
+ r'>LESSON LOCKED<')):
+ self.raise_login_required('Lecture contents locked')
+ raise ExtractorError('Unable to find video URL')
+
+ title = self._og_search_title(webpage, default=None)
+
+ chapter = None
+ chapter_number = None
+ section_item = self._search_regex(
+ r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
+ webpage, 'section item', default=None, group='li')
+ if section_item:
+ chapter_number = int_or_none(self._search_regex(
+ r'data-ss-position=["\'](\d+)', section_item, 'section id',
+ default=None))
+ if chapter_number is not None:
+ sections = []
+ for s in re.findall(
+ r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage):
+ section = strip_or_none(clean_html(s))
+ if not section:
+ sections = []
+ break
+ sections.append(section)
+ if chapter_number <= len(sections):
+ chapter = sections[chapter_number - 1]
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': wistia_url,
+ 'ie_key': WistiaIE.ie_key(),
+ 'title': title,
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ } for wistia_url in wistia_urls]
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class TeachableCourseIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+ _TESTS = [{
+ 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
+ 'info_dict': {
+ 'id': 'essential-web-developer-course',
+ 'title': 'The Essential Web Developer Course (Free)',
+ },
+ 'playlist_count': 192,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/enrolled/423415',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TeachableIE.suitable(url) else super(
+ TeachableCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site') or mobj.group('site_t')
+ course_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ prefix = self._URL_PREFIX
+ url = url[len(prefix):]
+
+ webpage = self._download_webpage(url, course_id)
+
+ url_base = 'https://%s/' % site
+
+ entries = []
+
+ for mobj in re.finditer(
+ r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
+ webpage):
+ li = mobj.group('li')
+ if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li):
+ continue
+ lecture_url = self._search_regex(
+ r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
+ 'lecture url', default=None, group='url')
+ if not lecture_url:
+ continue
+ lecture_id = self._search_regex(
+ r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
+ title = self._html_search_regex(
+ r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
+ 'title', default=None)
+ entry_url = urljoin(url_base, lecture_url)
+ if prefixed:
+ entry_url = self._URL_PREFIX + entry_url
+ entries.append(
+ self.url_result(
+ entry_url,
+ ie=TeachableIE.ie_key(), video_id=lecture_id,
+ video_title=clean_html(title)))
+
+ course_title = self._html_search_regex(
+ (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
+ r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
+ webpage, 'course title', fatal=False)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/hypervideo_dl/extractor/teachertube.py b/hypervideo_dl/extractor/teachertube.py
new file mode 100644
index 0000000..1272078
--- /dev/null
+++ b/hypervideo_dl/extractor/teachertube.py
@@ -0,0 +1,129 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ qualities,
+)
+
+
+class TeacherTubeIE(InfoExtractor):
+ IE_NAME = 'teachertube'
+ IE_DESC = 'teachertube.com videos'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
+
+ _TESTS = [{
+ # flowplayer
+ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
+ 'md5': 'f9434ef992fd65936d72999951ee254c',
+ 'info_dict': {
+ 'id': '339997',
+ 'ext': 'mp4',
+ 'title': 'Measures of dispersion from a frequency table',
+ 'description': 'Measures of dispersion from a frequency table',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ },
+ }, {
+ # jwplayer
+ 'url': 'http://www.teachertube.com/music.php?music_id=8805',
+ 'md5': '01e8352006c65757caf7b961f6050e21',
+ 'info_dict': {
+ 'id': '8805',
+ 'ext': 'mp3',
+ 'title': 'PER ASPERA AD ASTRA',
+ 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
+ },
+ }, {
+ # unavailable video
+ 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage,
+ 'error', default=None)
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ title = self._html_search_meta('title', webpage, 'title', fatal=True)
+ TITLE_SUFFIX = ' - TeacherTube'
+ if title.endswith(TITLE_SUFFIX):
+ title = title[:-len(TITLE_SUFFIX)].strip()
+
+ description = self._html_search_meta('description', webpage, 'description')
+ if description:
+ description = description.strip()
+
+ quality = qualities(['mp3', 'flv', 'mp4'])
+
+ media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
+ media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
+ media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
+
+ formats = [
+ {
+ 'url': media_url,
+ 'quality': quality(determine_ext(media_url))
+ } for media_url in set(media_urls)
+ ]
+
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'thumbnail', webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class TeacherTubeUserIE(InfoExtractor):
+ IE_NAME = 'teachertube:user:collection'
+ IE_DESC = 'teachertube.com user and collection videos'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
+
+ _MEDIA_RE = r'''(?sx)
+ class="?sidebar_thumb_time"?>[0-9:]+</div>
+ \s*
+ <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
+ '''
+ _TEST = {
+ 'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+ 'info_dict': {
+ 'id': 'rbhagwati2'
+ },
+ 'playlist_mincount': 179,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('user')
+
+ urls = []
+ webpage = self._download_webpage(url, user_id)
+ urls.extend(re.findall(self._MEDIA_RE, webpage))
+
+ pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
+ for p in pages:
+ more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
+ webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
+ video_urls = re.findall(self._MEDIA_RE, webpage)
+ urls.extend(video_urls)
+
+ entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
+ return self.playlist_result(entries, user_id)
diff --git a/hypervideo_dl/extractor/teachingchannel.py b/hypervideo_dl/extractor/teachingchannel.py
new file mode 100644
index 0000000..624cdb3
--- /dev/null
+++ b/hypervideo_dl/extractor/teachingchannel.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TeachingChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P<id>[^/?&#]+)'
+
+ _TEST = {
+ 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+ 'info_dict': {
+ 'id': '3swwlzkT',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422,
+ 'upload_date': '20170316',
+ 'timestamp': 1489691297,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['JWPlatform'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ mid = self._search_regex(
+ r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})',
+ webpage, 'media id')
+
+ return self.url_result('jwplatform:' + mid, 'JWPlatform', mid)
diff --git a/hypervideo_dl/extractor/teamcoco.py b/hypervideo_dl/extractor/teamcoco.py
new file mode 100644
index 0000000..5793b71
--- /dev/null
+++ b/hypervideo_dl/extractor/teamcoco.py
@@ -0,0 +1,205 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+)
+
+
+class TeamcocoIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+ _TESTS = [
+ {
+ 'url': 'http://teamcoco.com/video/mary-kay-remote',
+ 'md5': '55d532f81992f5c92046ad02fec34d7d',
+ 'info_dict': {
+ 'id': '80187',
+ 'ext': 'mp4',
+ 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+ 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'duration': 495.0,
+ 'upload_date': '20140402',
+ 'timestamp': 1396407600,
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+ 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+ 'info_dict': {
+ 'id': '19705',
+ 'ext': 'mp4',
+ 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
+ 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'duration': 288,
+ 'upload_date': '20111104',
+ 'timestamp': 1320405840,
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+ 'info_dict': {
+ 'id': '88748',
+ 'ext': 'mp4',
+ 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+ 'description': 'md5:15501f23f020e793aeca761205e42c24',
+ 'upload_date': '20150415',
+ 'timestamp': 1429088400,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
+ 'info_dict': {
+ 'id': '89341',
+ 'ext': 'mp4',
+ 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ },
+ 'skip': 'This video is no longer available.',
+ }, {
+ 'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+ 'only_matching': True,
+ }
+ ]
+ _RECORD_TEMPL = '''id
+ title
+ teaser
+ publishOn
+ thumb {
+ preview
+ }
+ tags {
+ name
+ }
+ duration
+ turnerMediaId
+ turnerMediaAuthToken'''
+
+ def _graphql_call(self, query_template, object_type, object_id):
+ find_object = 'find' + object_type
+ return self._download_json(
+ 'https://teamcoco.com/graphql', object_id, data=json.dumps({
+ 'query': query_template % (find_object, object_id)
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['data'][find_object]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ response = self._graphql_call('''{
+ %%s(slug: "%%s") {
+ ... on RecordSlug {
+ record {
+ %s
+ }
+ }
+ ... on PageSlug {
+ child {
+ id
+ }
+ }
+ ... on NotFoundSlug {
+ status
+ }
+ }
+}''' % self._RECORD_TEMPL, 'Slug', display_id)
+ if response.get('status'):
+ raise ExtractorError('This video is no longer available.', expected=True)
+
+ child = response.get('child')
+ if child:
+ record = self._graphql_call('''{
+ %%s(id: "%%s") {
+ ... on Video {
+ %s
+ }
+ }
+}''' % self._RECORD_TEMPL, 'Record', child['id'])
+ else:
+ record = response['record']
+ video_id = record['id']
+
+ info = {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': record['title'],
+ 'thumbnail': record.get('thumb', {}).get('preview'),
+ 'description': record.get('teaser'),
+ 'duration': parse_duration(record.get('duration')),
+ 'timestamp': parse_iso8601(record.get('publishOn')),
+ }
+
+ media_id = record.get('turnerMediaId')
+ if media_id:
+ self._initialize_geo_bypass({
+ 'countries': ['US'],
+ })
+ info.update(self._extract_ngtv_info(media_id, {
+ 'accessToken': record['turnerMediaAuthToken'],
+ 'accessTokenType': 'jws',
+ }))
+ else:
+ video_sources = self._download_json(
+ 'https://teamcoco.com/_truman/d/' + video_id,
+ video_id)['meta']['src']
+ if isinstance(video_sources, dict):
+ video_sources = video_sources.values()
+
+ formats = []
+ get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+ for src in video_sources:
+ if not isinstance(src, dict):
+ continue
+ src_url = src.get('src')
+ if not src_url:
+ continue
+ format_id = src.get('label')
+ ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+ if format_id == 'hls' or ext == 'm3u8':
+ # compat_urllib_parse.urljoin does not work here
+ if src_url.startswith('/'):
+ src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+ formats.extend(self._extract_m3u8_formats(
+ src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ else:
+ if src_url.startswith('/mp4:protected/'):
+ # TODO Correct extraction for these files
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+ formats.append({
+ 'url': src_url,
+ 'ext': ext,
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
+ self._sort_formats(formats)
+ info['formats'] = formats
+
+ return info
diff --git a/hypervideo_dl/extractor/teamtreehouse.py b/hypervideo_dl/extractor/teamtreehouse.py
new file mode 100644
index 0000000..d347e97
--- /dev/null
+++ b/hypervideo_dl/extractor/teamtreehouse.py
@@ -0,0 +1,140 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ parse_duration,
+ remove_end,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeamTreeHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
+ _TESTS = [{
+ # Course
+ 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
+ 'info_dict': {
+ 'id': 'introduction-to-user-authentication-in-php',
+ 'title': 'Introduction to User Authentication in PHP',
+ 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # WorkShop
+ 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
+ 'info_dict': {
+ 'id': 'deploying-a-react-app',
+ 'title': 'Deploying a React App',
+ 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # Video
+ 'url': 'https://teamtreehouse.com/library/application-overview-2',
+ 'info_dict': {
+ 'id': 'application-overview-2',
+ 'ext': 'mp4',
+ 'title': 'Application Overview',
+ 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
+ },
+ 'expected_warnings': ['This is just a preview'],
+ }]
+ _NETRC_MACHINE = 'teamtreehouse'
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ signin_page = self._download_webpage(
+ 'https://teamtreehouse.com/signin',
+ None, 'Downloading signin page')
+ data = self._form_hidden_inputs('new_user_session', signin_page)
+ data.update({
+ 'user_session[email]': email,
+ 'user_session[password]': password,
+ })
+ error_message = get_element_by_class('error-message', self._download_webpage(
+ 'https://teamtreehouse.com/person_session',
+ None, 'Logging in', data=urlencode_postdata(data)))
+ if error_message:
+ raise ExtractorError(clean_html(error_message), expected=True)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage)
+ entries = self._parse_html5_media_entries(url, webpage, display_id)
+ if entries:
+ info = entries[0]
+
+ for subtitles in info.get('subtitles', {}).values():
+ for subtitle in subtitles:
+ subtitle['ext'] = determine_ext(subtitle['url'], 'srt')
+
+ is_preview = 'data-preview="true"' in webpage
+ if is_preview:
+ self.report_warning(
+ 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id)
+ duration = 30
+ else:
+ duration = float_or_none(self._search_regex(
+ r'data-duration="(\d+)"', webpage, 'duration'), 1000)
+ if not duration:
+ duration = parse_duration(get_element_by_id(
+ 'video-duration', webpage))
+
+ info.update({
+ 'id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ })
+ return info
+ else:
+ def extract_urls(html, extract_info=None):
+ for path in re.findall(r'<a[^>]+href="([^"]+)"', html):
+ page_url = urljoin(url, path)
+ entry = {
+ '_type': 'url_transparent',
+ 'id': self._match_id(page_url),
+ 'url': page_url,
+ 'id_key': self.ie_key(),
+ }
+ if extract_info:
+ entry.update(extract_info)
+ entries.append(entry)
+
+ workshop_videos = self._search_regex(
+ r'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
+ webpage, 'workshop videos', default=None)
+ if workshop_videos:
+ extract_urls(workshop_videos)
+ else:
+ stages_path = self._search_regex(
+ r'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
+ webpage, 'stages path')
+ if stages_path:
+ stages_page = self._download_webpage(
+ urljoin(url, stages_path), display_id, 'Downloading stages page')
+ for chapter_number, (chapter, steps_list) in enumerate(re.findall(r'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page), 1):
+ extract_urls(steps_list, {
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ })
+ title = remove_end(title, ' Course')
+
+ return self.playlist_result(
+ entries, display_id, title, description)
diff --git a/hypervideo_dl/extractor/techtalks.py b/hypervideo_dl/extractor/techtalks.py
new file mode 100644
index 0000000..a5b62c7
--- /dev/null
+++ b/hypervideo_dl/extractor/techtalks.py
@@ -0,0 +1,82 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_attribute,
+ clean_html,
+)
+
+
+class TechTalksIE(InfoExtractor):
+ _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
+ 'info_dict': {
+ 'id': '57758',
+ 'title': 'Learning Topic Models --- Going beyond SVD',
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': '57758',
+ 'ext': 'flv',
+ 'title': 'Learning Topic Models --- Going beyond SVD',
+ },
+ },
+ {
+ 'info_dict': {
+ 'id': '57758-slides',
+ 'ext': 'flv',
+ 'title': 'Learning Topic Models --- Going beyond SVD',
+ },
+ },
+ ],
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://techtalks.tv/talks/57758',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ talk_id = mobj.group('id')
+ webpage = self._download_webpage(url, talk_id)
+ rtmp_url = self._search_regex(
+ r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
+ play_path = self._search_regex(
+ r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
+ webpage, 'presenter play path')
+ title = clean_html(get_element_by_attribute('class', 'title', webpage))
+ video_info = {
+ 'id': talk_id,
+ 'title': title,
+ 'url': rtmp_url,
+ 'play_path': play_path,
+ 'ext': 'flv',
+ }
+ m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
+ if m_slides is None:
+ return video_info
+ else:
+ return {
+ '_type': 'playlist',
+ 'id': talk_id,
+ 'title': title,
+ 'entries': [
+ video_info,
+ # The slides video
+ {
+ 'id': talk_id + '-slides',
+ 'title': title,
+ 'url': rtmp_url,
+ 'play_path': m_slides.group(1),
+ 'ext': 'flv',
+ },
+ ],
+ }
diff --git a/hypervideo_dl/extractor/ted.py b/hypervideo_dl/extractor/ted.py
new file mode 100644
index 0000000..f09f1a3
--- /dev/null
+++ b/hypervideo_dl/extractor/ted.py
@@ -0,0 +1,367 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+
+from ..compat import (
+ compat_str,
+ compat_urlparse
+)
+from ..utils import (
+ extract_attributes,
+ float_or_none,
+ int_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class TEDIE(InfoExtractor):
+ IE_NAME = 'ted'
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?://)
+ (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
+ (
+ (?P<type_playlist>playlists(?:/(?P<playlist_id>\d+))?) # We have a playlist
+ |
+ ((?P<type_talk>talks)) # We have a simple talk
+ |
+ (?P<type_watch>watch)/[^/]+/[^/]+
+ )
+ (/lang/(.*?))? # The url may contain the language
+ /(?P<name>[\w-]+) # Here goes the name and then ".html"
+ .*)$
+ '''
+ _TESTS = [{
+ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
+ 'md5': 'b0ce2b05ca215042124fbc9e3886493a',
+ 'info_dict': {
+ 'id': '102',
+ 'ext': 'mp4',
+ 'title': 'The illusion of consciousness',
+ 'description': ('Philosopher Dan Dennett makes a compelling '
+ 'argument that not only don\'t we understand our own '
+ 'consciousness, but that half the time our brains are '
+ 'actively fooling us.'),
+ 'uploader': 'Dan Dennett',
+ 'width': 853,
+ 'duration': 1308,
+ 'view_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # missing HTTP bitrates
+ 'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
+ 'info_dict': {
+ 'id': '6069',
+ 'ext': 'mp4',
+ 'title': 'The beauty and power of algorithms',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'description': 'md5:734e352710fb00d840ab87ae31aaf688',
+ 'uploader': 'Vishal Sikka',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
+ 'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
+ 'info_dict': {
+ 'id': '1972',
+ 'ext': 'mp4',
+ 'title': 'Be passionate. Be courageous. Be your best.',
+ 'uploader': 'Gabby Giffords and Mark Kelly',
+ 'description': 'md5:5174aed4d0f16021b704120360f72b92',
+ 'duration': 1128,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+ 'info_dict': {
+ 'id': '10',
+ 'title': 'Who are the hackers?',
+ 'description': 'md5:49a0dbe8fb76d81a0e64b4a80af7f15a'
+ },
+ 'playlist_mincount': 6,
+ }, {
+ # contains a youtube video
+ 'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': '_ZG8HBuDjgc',
+ 'ext': 'webm',
+ 'title': 'Douglas Adams: Parrots the Universe and Everything',
+ 'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
+ 'uploader': 'University of California Television (UCTV)',
+ 'uploader_id': 'UCtelevision',
+ 'upload_date': '20080522',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # no nativeDownloads
+ 'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
+ 'info_dict': {
+ 'id': '1792',
+ 'ext': 'mp4',
+ 'title': 'The orchestra in my mouth',
+ 'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
+ 'uploader': 'Tom Thum',
+ 'view_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with own formats and private Youtube external
+ 'url': 'https://www.ted.com/talks/spencer_wells_a_family_tree_for_humanity',
+ 'only_matching': True,
+ }]
+
+ _NATIVE_FORMATS = {
+ 'low': {'width': 320, 'height': 180},
+ 'medium': {'width': 512, 'height': 288},
+ 'high': {'width': 854, 'height': 480},
+ }
+
+ def _extract_info(self, webpage):
+ info_json = self._search_regex(
+ r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*</script>',
+ webpage, 'info json')
+ return json.loads(info_json)
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url, re.VERBOSE)
+ if m.group('type').startswith('embed'):
+ desktop_url = m.group('proto') + 'www' + m.group('urlmain')
+ return self.url_result(desktop_url, 'TED')
+ name = m.group('name')
+ if m.group('type_talk'):
+ return self._talk_info(url, name)
+ elif m.group('type_watch'):
+ return self._watch_info(url, name)
+ else:
+ return self._playlist_videos_info(url, name)
+
+ def _playlist_videos_info(self, url, name):
+ '''Returns the videos of the playlist'''
+
+ webpage = self._download_webpage(url, name,
+ 'Downloading playlist webpage')
+
+ playlist_entries = []
+ for entry in re.findall(r'(?s)<[^>]+data-ga-context=["\']playlist["\'][^>]*>', webpage):
+ attrs = extract_attributes(entry)
+ entry_url = compat_urlparse.urljoin(url, attrs['href'])
+ playlist_entries.append(self.url_result(entry_url, self.ie_key()))
+
+ final_url = self._og_search_url(webpage, fatal=False)
+ playlist_id = (
+ re.match(self._VALID_URL, final_url).group('playlist_id')
+ if final_url else None)
+
+ return self.playlist_result(
+ playlist_entries, playlist_id=playlist_id,
+ playlist_title=self._og_search_title(webpage, fatal=False),
+ playlist_description=self._og_search_description(webpage))
+
+ def _talk_info(self, url, video_name):
+ webpage = self._download_webpage(url, video_name)
+
+ info = self._extract_info(webpage)
+
+ data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
+ talk_info = data['talks'][0]
+
+ title = talk_info['title'].strip()
+
+ downloads = talk_info.get('downloads') or {}
+ native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
+
+ formats = [{
+ 'url': format_url,
+ 'format_id': format_id,
+ } for (format_id, format_url) in native_downloads.items() if format_url is not None]
+
+ subtitled_downloads = downloads.get('subtitledDownloads') or {}
+ for lang, subtitled_download in subtitled_downloads.items():
+ for q in self._NATIVE_FORMATS:
+ q_url = subtitled_download.get(q)
+ if not q_url:
+ continue
+ formats.append({
+ 'url': q_url,
+ 'format_id': '%s-%s' % (q, lang),
+ 'language': lang,
+ })
+
+ if formats:
+ for f in formats:
+ finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
+ if finfo:
+ f.update(finfo)
+
+ player_talk = talk_info['player_talks'][0]
+
+ resources_ = player_talk.get('resources') or talk_info.get('resources')
+
+ http_url = None
+ for format_id, resources in resources_.items():
+ if format_id == 'hls':
+ if not isinstance(resources, dict):
+ continue
+ stream_url = url_or_none(resources.get('stream'))
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_name, 'mp4', m3u8_id=format_id,
+ fatal=False))
+ else:
+ if not isinstance(resources, list):
+ continue
+ if format_id == 'h264':
+ for resource in resources:
+ h264_url = resource.get('file')
+ if not h264_url:
+ continue
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': h264_url,
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ if re.search(r'\d+k', h264_url):
+ http_url = h264_url
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ for resource in resources:
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ })
+
+ m3u8_formats = list(filter(
+ lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
+ formats))
+ if http_url:
+ for m3u8_format in m3u8_formats:
+ bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
+ if not bitrate:
+ continue
+ bitrate_url = re.sub(r'\d+k', bitrate, http_url)
+ if not self._is_valid_url(
+ bitrate_url, video_name, '%s bitrate' % bitrate):
+ continue
+ f = m3u8_format.copy()
+ f.update({
+ 'url': bitrate_url,
+ 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ if f.get('acodec') == 'none':
+ del f['acodec']
+ formats.append(f)
+
+ audio_download = talk_info.get('audioDownload')
+ if audio_download:
+ formats.append({
+ 'url': audio_download,
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ })
+
+ if not formats:
+ external = player_talk.get('external')
+ if isinstance(external, dict):
+ service = external.get('service')
+ if isinstance(service, compat_str):
+ ext_url = None
+ if service.lower() == 'youtube':
+ ext_url = external.get('code')
+ return self.url_result(ext_url or external['uri'])
+
+ self._sort_formats(formats)
+
+ video_id = compat_str(talk_info['id'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
+ 'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
+ 'description': self._og_search_description(webpage),
+ 'subtitles': self._get_subtitles(video_id, talk_info),
+ 'formats': formats,
+ 'duration': float_or_none(talk_info.get('duration')),
+ 'view_count': int_or_none(data.get('viewed_count')),
+ 'comment_count': int_or_none(
+ try_get(data, lambda x: x['comments']['count'])),
+ 'tags': try_get(talk_info, lambda x: x['tags'], list),
+ }
+
+ def _get_subtitles(self, video_id, talk_info):
+ sub_lang_list = {}
+ for language in try_get(
+ talk_info,
+ (lambda x: x['downloads']['languages'],
+ lambda x: x['languages']), list):
+ lang_code = language.get('languageCode') or language.get('ianaCode')
+ if not lang_code:
+ continue
+ sub_lang_list[lang_code] = [
+ {
+ 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, lang_code, ext),
+ 'ext': ext,
+ }
+ for ext in ['ted', 'srt']
+ ]
+ return sub_lang_list
+
+ def _watch_info(self, url, name):
+ webpage = self._download_webpage(url, name)
+
+ config_json = self._html_search_regex(
+ r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
+ webpage, 'config', default=None)
+ if not config_json:
+ embed_url = self._search_regex(
+ r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
+ return self.url_result(self._proto_relative_url(embed_url))
+ config = json.loads(config_json)['config']
+ video_url = config['video']['url']
+ thumbnail = config.get('image', {}).get('url')
+
+ title = self._html_search_regex(
+ r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
+ description = self._html_search_regex(
+ [
+ r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+ r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
+ ],
+ webpage, 'description', fatal=False)
+
+ return {
+ 'id': name,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/tele13.py b/hypervideo_dl/extractor/tele13.py
new file mode 100644
index 0000000..a29a64b
--- /dev/null
+++ b/hypervideo_dl/extractor/tele13.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ js_to_json,
+ qualities,
+ determine_ext,
+)
+
+
+class Tele13IE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'md5': '4cb1fa38adcad8fea88487a078831755',
+ 'info_dict': {
+ 'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'ext': 'mp4',
+ 'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
+ },
+ 'params': {
+ # HTTP Error 404: Not Found
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
+ 'md5': '867adf6a3b3fef932c68a71d70b70946',
+ 'info_dict': {
+ 'id': 'rOoKv2OMpOw',
+ 'ext': 'mp4',
+ 'title': 'Shooting star seen on 7-Sep-2015',
+ 'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
+ 'uploader': 'Porjai Jaturongkhakun',
+ 'upload_date': '20150906',
+ 'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
+ },
+ 'add_ie': ['Youtube'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ setup_js = self._search_regex(
+ r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)",
+ webpage, 'setup code')
+ sources = self._parse_json(self._search_regex(
+ r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'),
+ display_id, js_to_json)
+
+ preference = qualities(['Móvil', 'SD', 'HD'])
+ formats = []
+ urls = []
+ for f in sources:
+ format_url = f['file']
+ if format_url and format_url not in urls:
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif YoutubeIE.suitable(format_url):
+ return self.url_result(format_url, 'Youtube')
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': f.get('label'),
+ 'preference': preference(f.get('label')),
+ 'ext': ext,
+ })
+ urls.append(format_url)
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': self._search_regex(
+ r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
+ 'description': self._html_search_meta(
+ 'description', webpage, 'description'),
+ 'thumbnail': self._search_regex(
+ r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tele5.py b/hypervideo_dl/extractor/tele5.py
new file mode 100644
index 0000000..3e1a7a9
--- /dev/null
+++ b/hypervideo_dl/extractor/tele5.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from .nexx import NexxIE
+from ..compat import compat_urlparse
+from ..utils import (
+ NO_DEFAULT,
+ smuggle_url,
+)
+
+
+class Tele5IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_COUNTRIES = ['DE']
+ _TESTS = [{
+ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+ 'info_dict': {
+ 'id': '1549416',
+ 'ext': 'mp4',
+ 'upload_date': '20180814',
+ 'timestamp': 1534290623,
+ 'title': 'Pandorum',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # jwplatform, nexx unavailable
+ 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ 'info_dict': {
+ 'id': 'WJuiOlUp',
+ 'ext': 'mp4',
+ 'upload_date': '20200603',
+ 'timestamp': 1591214400,
+ 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
+ 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ }, {
+ 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/anders-ist-sevda/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
+
+ NEXX_ID_RE = r'\d{6,}'
+ JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
+
+ def nexx_result(nexx_id):
+ return self.url_result(
+ 'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
+ ie=NexxIE.ie_key(), video_id=nexx_id)
+
+ nexx_id = jwplatform_id = None
+
+ if video_id:
+ if re.match(NEXX_ID_RE, video_id):
+ return nexx_result(video_id)
+ elif re.match(JWPLATFORM_ID_RE, video_id):
+ jwplatform_id = video_id
+
+ if not nexx_id:
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ def extract_id(pattern, name, default=NO_DEFAULT):
+ return self._html_search_regex(
+ (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
+ r'\s+id\s*=\s*["\']player_(%s)' % pattern,
+ r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
+ default=default)
+
+ nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
+ if nexx_id:
+ return nexx_result(nexx_id)
+
+ if not jwplatform_id:
+ jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
+
+ return self.url_result(
+ smuggle_url(
+ 'jwplatform:%s' % jwplatform_id,
+ {'geo_countries': self._GEO_COUNTRIES}),
+ ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
diff --git a/hypervideo_dl/extractor/telebruxelles.py b/hypervideo_dl/extractor/telebruxelles.py
new file mode 100644
index 0000000..a0353fe
--- /dev/null
+++ b/hypervideo_dl/extractor/telebruxelles.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TeleBruxellesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(?:[^/]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
+ 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
+ 'info_dict': {
+ 'id': '158856',
+ 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe',
+ 'ext': 'mp4',
+ 'title': 'Que risque l’auteur d’une fausse alerte à la bombe ?',
+ 'description': 'md5:3cf8df235d44ebc5426373050840e466',
+ },
+ }, {
+ 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/',
+ 'md5': 'dfe07ecc9c153ceba8582ac912687675',
+ 'info_dict': {
+ 'id': '158433',
+ 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin',
+ 'ext': 'mp4',
+ 'title': 'Futsal : Schaerbeek s’incline 5-3 à Thulin',
+ 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b',
+ },
+ }, {
+ 'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bx1.be/berchem-sainte-agathe/personnel-carrefour-de-berchem-sainte-agathe-inquiet/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bx1.be/dernier-jt/',
+ 'only_matching': True,
+ }, {
+ # live stream
+ 'url': 'https://bx1.be/lives/direct-tv/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ article_id = self._html_search_regex(
+ r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None)
+ title = self._html_search_regex(
+ r'<h1[^>]*>(.+?)</h1>', webpage, 'title',
+ default=None) or self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+
+ rtmp_url = self._html_search_regex(
+ r'file["\']?\s*:\s*"(r(?:tm|mt)ps?://[^/]+/(?:vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*"\.mp4|stream/live))"',
+ webpage, 'RTMP url')
+ # Yes, they have a typo in scheme name for live stream URLs (e.g.
+ # https://bx1.be/lives/direct-tv/)
+ rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
+ rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
+ formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
+ self._sort_formats(formats)
+
+ is_live = 'stream/live' in rtmp_url
+
+ return {
+ 'id': article_id or display_id,
+ 'display_id': display_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': description,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/telecinco.py b/hypervideo_dl/extractor/telecinco.py
new file mode 100644
index 0000000..eecd6a5
--- /dev/null
+++ b/hypervideo_dl/extractor/telecinco.py
@@ -0,0 +1,151 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class TelecincoIE(InfoExtractor):
+ IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
+ _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+ 'info_dict': {
+ 'id': '1876350223',
+ 'title': 'Bacalao con kokotxas al pil-pil',
+ 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
+ },
+ 'playlist': [{
+ 'md5': '7ee56d665cfd241c0e6d80fd175068b0',
+ 'info_dict': {
+ 'id': 'JEA5ijCnF6p5W08A1rNKn7',
+ 'ext': 'mp4',
+ 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
+ 'duration': 662,
+ },
+ }]
+ }, {
+ 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
+ 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
+ 'info_dict': {
+ 'id': 'jn24Od1zGLG4XUZcnUnZB6',
+ 'ext': 'mp4',
+ 'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
+ 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
+ 'duration': 79,
+ },
+ }, {
+ 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
+ 'md5': 'eddb50291df704ce23c74821b995bcac',
+ 'info_dict': {
+ 'id': 'aywerkD2Sv1vGNqq9b85Q2',
+ 'ext': 'mp4',
+ 'title': '#DOYLACARA. Con la trata no hay trato',
+ 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
+ 'duration': 50,
+ },
+ }, {
+ # video in opening's content
+ 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
+ 'info_dict': {
+ 'id': '2907195140',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'description': 'md5:73f340a7320143d37ab895375b2bf13a',
+ },
+ 'playlist': [{
+ 'md5': 'adb28c37238b675dad0f042292f209a7',
+ 'info_dict': {
+ 'id': 'TpI2EttSDAReWpJ1o0NVh2',
+ 'ext': 'mp4',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'duration': 1015,
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
+ 'only_matching': True,
+ }, {
+ # ooyala video
+ 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
+ 'only_matching': True,
+ }]
+
+ def _parse_content(self, content, url):
+ video_id = content['dataMediaId']
+ config = self._download_json(
+ content['dataConfig'], video_id, 'Downloading config JSON')
+ title = config['info']['title']
+ services = config['services']
+ caronte = self._download_json(services['caronte'], video_id)
+ stream = caronte['dls'][0]['stream']
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'Content-Type': 'application/json;charset=UTF-8',
+ 'Origin': re.match(r'https?://[^/]+', url).group(0),
+ })
+ cdn = self._download_json(
+ caronte['cerbero'], video_id, data=json.dumps({
+ 'bbx': caronte['bbx'],
+ 'gbx': self._download_json(services['gbx'], video_id)['gbx'],
+ }).encode(), headers=headers)['tokens']['1']['cdn']
+ formats = self._extract_m3u8_formats(
+ stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
+ 'duration': int_or_none(content.get('dataDuration')),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ article = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
+ webpage, 'article'), display_id)['article']
+ title = article.get('title')
+ description = clean_html(article.get('leadParagraph')) or ''
+ if article.get('editorialType') != 'VID':
+ entries = []
+ body = [article.get('opening')]
+ body.extend(try_get(article, lambda x: x['body'], list) or [])
+ for p in body:
+ if not isinstance(p, dict):
+ continue
+ content = p.get('content')
+ if not content:
+ continue
+ type_ = p.get('type')
+ if type_ == 'paragraph':
+ content_str = str_or_none(content)
+ if content_str:
+ description += content_str
+ continue
+ if type_ == 'video' and isinstance(content, dict):
+ entries.append(self._parse_content(content, url))
+ return self.playlist_result(
+ entries, str_or_none(article.get('id')), title, description)
+ content = article['opening']['content']
+ info = self._parse_content(content, url)
+ info.update({
+ 'description': description,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/telegraaf.py b/hypervideo_dl/extractor/telegraaf.py
new file mode 100644
index 0000000..2dc0205
--- /dev/null
+++ b/hypervideo_dl/extractor/telegraaf.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class TelegraafIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
+ 'info_dict': {
+ 'id': 'gaMItuoSeUg2',
+ 'ext': 'mp4',
+ 'title': 'Historisch scheepswrak slaat na 100 jaar los',
+ 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 55,
+ 'timestamp': 1572805527,
+ 'upload_date': '20191103',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ video_id = self._download_json(
+ 'https://www.telegraaf.nl/graphql', article_id, query={
+ 'query': '''{
+ article(uid: %s) {
+ videos {
+ videoId
+ }
+ }
+}''' % article_id,
+ })['data']['article']['videos'][0]['videoId']
+
+ item = self._download_json(
+ 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
+ video_id)['items'][0]
+ title = item['title']
+
+ formats = []
+ locations = item.get('locations') or {}
+ for location in locations.get('adaptive', []):
+ manifest_url = location.get('src')
+ if not manifest_url:
+ continue
+ ext = determine_ext(manifest_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ manifest_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ self.report_warning('Unknown adaptive format %s' % ext)
+ for location in locations.get('progressive', []):
+ src = try_get(location, lambda x: x['sources'][0]['src'])
+ if not src:
+ continue
+ label = location.get('label')
+ formats.append({
+ 'url': src,
+ 'width': int_or_none(location.get('width')),
+ 'height': int_or_none(location.get('height')),
+ 'format_id': 'http' + ('-%s' % label if label else ''),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.get('description'),
+ 'formats': formats,
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('poster'),
+ 'timestamp': parse_iso8601(item.get('datecreated'), ' '),
+ }
diff --git a/hypervideo_dl/extractor/telemb.py b/hypervideo_dl/extractor/telemb.py
new file mode 100644
index 0000000..9bcac4e
--- /dev/null
+++ b/hypervideo_dl/extractor/telemb.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class TeleMBIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html'
+ _TESTS = [
+ {
+ 'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html',
+ 'md5': 'f45ea69878516ba039835794e0f8f783',
+ 'info_dict': {
+ 'id': '13466',
+ 'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-',
+ 'ext': 'mp4',
+ 'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages',
+ 'description': 'md5:bc5225f47b17c309761c856ad4776265',
+ 'thumbnail': r're:^http://.*\.(?:jpg|png)$',
+ }
+ },
+ {
+ # non-ASCII characters in download URL
+ 'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html',
+ 'md5': '6e9682736e5ccd4eab7f21e855350733',
+ 'info_dict': {
+ 'id': '13514',
+ 'display_id': 'les-reportages-havre-incendie-mortel',
+ 'ext': 'mp4',
+ 'title': 'Havré - Incendie mortel - Les reportages',
+ 'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a',
+ 'thumbnail': r're:^http://.*\.(?:jpg|png)$',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ formats = []
+ for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage):
+ fmt = {
+ 'url': video_url,
+ 'format_id': video_url.split(':')[0]
+ }
+ rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+ if rtmp:
+ fmt.update({
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf',
+ 'page_url': 'http://www.telemb.be',
+ 'preference': -1,
+ })
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
+ description = self._html_search_regex(
+ r'<meta property="og:description" content="(.+?)" />',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/telequebec.py b/hypervideo_dl/extractor/telequebec.py
new file mode 100644
index 0000000..800d87b
--- /dev/null
+++ b/hypervideo_dl/extractor/telequebec.py
@@ -0,0 +1,252 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ try_get,
+ unified_timestamp,
+)
+
+
+class TeleQuebecBaseIE(InfoExtractor):
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+ @staticmethod
+ def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
+ 'ie_key': 'BrightcoveNew',
+ }
+
+
+class TeleQuebecIE(TeleQuebecBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ zonevideo\.telequebec\.tv/media|
+ coucou\.telequebec\.tv/videos
+ )/(?P<id>\d+)
+ '''
+ _TESTS = [{
+ # available till 01.01.2023
+ 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
+ 'info_dict': {
+ 'id': '6155972771001',
+ 'ext': 'mp4',
+ 'title': 'Un petit choc et puis repart!',
+ 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+ 'timestamp': 1589262469,
+ 'uploader_id': '6150020952001',
+ 'upload_date': '20200512',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
+ 'info_dict': {
+ 'id': '6167180337001',
+ 'ext': 'mp4',
+ 'title': 'Le soleil',
+ 'description': 'md5:64289c922a8de2abbe99c354daffde02',
+ 'uploader_id': '6150020952001',
+ 'upload_date': '20200625',
+ 'timestamp': 1593090307,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ # no description
+ 'url': 'http://zonevideo.telequebec.tv/media/30261',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ media = self._download_json(
+ 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
+ media_id)['media']
+ source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
+ info = self._brightcove_result(source_id, '22gPKdt7f')
+ product = media.get('product') or {}
+ season = product.get('season') or {}
+ info.update({
+ 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
+ 'series': try_get(season, lambda x: x['serie']['titre']),
+ 'season': season.get('name'),
+ 'season_number': int_or_none(season.get('seasonNo')),
+ 'episode': product.get('titre'),
+ 'episode_number': int_or_none(product.get('episodeNo')),
+ })
+ return info
+
+
+class TeleQuebecSquatIE(InfoExtractor):
+ _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://squat.telequebec.tv/videos/9314',
+ 'info_dict': {
+ 'id': 'd59ae78112d542e793d83cc9d3a5b530',
+ 'ext': 'mp4',
+ 'title': 'Poupeflekta',
+ 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
+ 'duration': 1351,
+ 'timestamp': 1569057600,
+ 'upload_date': '20190921',
+ 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
+ 'season': 'Saison 3',
+ 'season_number': 3,
+ 'episode_number': 57,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
+ video_id)
+
+ media_id = video['sourceId']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
+ 'ie_key': TeleQuebecIE.ie_key(),
+ 'id': media_id,
+ 'title': video.get('titre'),
+ 'description': video.get('description'),
+ 'timestamp': unified_timestamp(video.get('datePublication')),
+ 'series': video.get('container'),
+ 'season': video.get('saison'),
+ 'season_number': int_or_none(video.get('noSaison')),
+ 'episode_number': int_or_none(video.get('episode')),
+ }
+
+
+class TeleQuebecEmissionIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ [^/]+\.telequebec\.tv/emissions/|
+ (?:www\.)?telequebec\.tv/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
+ 'info_dict': {
+ 'id': '6154476028001',
+ 'ext': 'mp4',
+ 'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?',
+ 'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
+ 'upload_date': '20200505',
+ 'timestamp': 1588713424,
+ 'uploader_id': '6150020952001',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ media_id = self._search_regex(
+ r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id')
+
+ return self.url_result(
+ 'http://zonevideo.telequebec.tv/media/' + media_id,
+ TeleQuebecIE.ie_key())
+
+
+class TeleQuebecLiveIE(TeleQuebecBaseIE):
+ _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
+ _TEST = {
+ 'url': 'http://zonevideo.telequebec.tv/endirect/',
+ 'info_dict': {
+ 'id': '6159095684001',
+ 'ext': 'mp4',
+ 'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ 'description': 'Canal principal de Télé-Québec',
+ 'uploader_id': '6150020952001',
+ 'timestamp': 1590439901,
+ 'upload_date': '20200525',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ return self._brightcove_result('6159095684001', 'skCsmi2Uw')
+
+
+class TeleQuebecVideoIE(TeleQuebecBaseIE):
+ _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://video.telequebec.tv/player/31110/stream',
+ 'info_dict': {
+ 'id': '6202570652001',
+ 'ext': 'mp4',
+ 'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
+ 'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
+ 'upload_date': '20201019',
+ 'timestamp': 1603115930,
+ 'uploader_id': '6101674910001',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://video.telequebec.tv/player-live/28527',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, path, video_id):
+ return self._download_json(
+ 'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
+ video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
+
+ def _real_extract(self, url):
+ asset_id = self._match_id(url)
+ asset = self._call_api(asset_id, asset_id)['asset']
+ stream = self._call_api(
+ asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
+ stream_url = stream['url']
+ account_id = try_get(
+ stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
+ info = self._brightcove_result(stream_url, 'default', account_id)
+ info.update({
+ 'description': asset.get('long_description') or asset.get('short_description'),
+ 'series': asset.get('series_original_name'),
+ 'season_number': int_or_none(asset.get('season_number')),
+ 'episode': asset.get('original_name'),
+ 'episode_number': int_or_none(asset.get('episode_number')),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/teletask.py b/hypervideo_dl/extractor/teletask.py
new file mode 100644
index 0000000..b9e2ef8
--- /dev/null
+++ b/hypervideo_dl/extractor/teletask.py
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class TeleTaskIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.tele-task.de/archive/video/html5/26168/',
+ 'info_dict': {
+ 'id': '26168',
+ 'title': 'Duplicate Detection',
+ },
+ 'playlist': [{
+ 'md5': '290ef69fb2792e481169c3958dbfbd57',
+ 'info_dict': {
+ 'id': '26168-speaker',
+ 'ext': 'mp4',
+ 'title': 'Duplicate Detection',
+ 'upload_date': '20141218',
+ }
+ }, {
+ 'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
+ 'info_dict': {
+ 'id': '26168-slides',
+ 'ext': 'mp4',
+ 'title': 'Duplicate Detection',
+ 'upload_date': '20141218',
+ }
+ }]
+ }
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+ webpage = self._download_webpage(url, lecture_id)
+
+ title = self._html_search_regex(
+ r'itemprop="name">([^<]+)</a>', webpage, 'title')
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
+
+ entries = [{
+ 'id': '%s-%s' % (lecture_id, format_id),
+ 'url': video_url,
+ 'title': title,
+ 'upload_date': upload_date,
+ } for format_id, video_url in re.findall(
+ r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
+
+ return self.playlist_result(entries, lecture_id, title)
diff --git a/hypervideo_dl/extractor/telewebion.py b/hypervideo_dl/extractor/telewebion.py
new file mode 100644
index 0000000..1207b1a
--- /dev/null
+++ b/hypervideo_dl/extractor/telewebion.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TelewebionIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.telewebion.com/#!/episode/1263668/',
+ 'info_dict': {
+ 'id': '1263668',
+ 'ext': 'mp4',
+ 'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ secure_token = self._download_webpage(
+ 'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
+ episode_details = self._download_json(
+ 'http://m.s2.telewebion.com/op/op', video_id,
+ query={'action': 'getEpisodeDetails', 'episode_id': video_id})
+
+ m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
+ video_id, episode_details['file_path'], secure_token)
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', m3u8_id='hls')
+
+ picture_paths = [
+ episode_details.get('picture_path'),
+ episode_details.get('large_picture_path'),
+ ]
+
+ thumbnails = [{
+ 'url': picture_path,
+ 'preference': idx,
+ } for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
+
+ return {
+ 'id': video_id,
+ 'title': episode_details['title'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'view_count': episode_details.get('view_count'),
+ }
diff --git a/hypervideo_dl/extractor/tennistv.py b/hypervideo_dl/extractor/tennistv.py
new file mode 100644
index 0000000..a586f30
--- /dev/null
+++ b/hypervideo_dl/extractor/tennistv.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ unified_timestamp,
+)
+
+
+class TennisTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
+ _TEST = {
+ 'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
+ 'info_dict': {
+ 'id': 'indian-wells-2018-verdasco-fritz',
+ 'ext': 'mp4',
+ 'title': 'Fernando Verdasco v Taylor Fritz',
+ 'description': 're:^After his stunning victory.{174}$',
+ 'thumbnail': 'https://atp-prod.akamaized.net/api/images/v1/images/112831/landscape/1242/0',
+ 'timestamp': 1521017381,
+ 'upload_date': '20180314',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires email and password of a subscribed account',
+ }
+ _NETRC_MACHINE = 'tennistv'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if not username or not password:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+
+ login_form = {
+ 'Email': username,
+ 'Password': password,
+ }
+ login_json = json.dumps(login_form).encode('utf-8')
+ headers = {
+ 'content-type': 'application/json',
+ 'Referer': 'https://www.tennistv.com/login',
+ 'Origin': 'https://www.tennistv.com',
+ }
+
+ login_result = self._download_json(
+ 'https://www.tennistv.com/api/users/v1/login', None,
+ note='Logging in',
+ errnote='Login failed (wrong password?)',
+ headers=headers,
+ data=login_json)
+
+ if login_result['error']['errorCode']:
+ raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+
+ if login_result['entitlement'] != 'SUBSCRIBED':
+ self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+
+ self._session_token = login_result['sessionToken']
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ internal_id = self._search_regex(r'video=([0-9]+)', webpage, 'internal video id')
+
+ headers = {
+ 'Origin': 'https://www.tennistv.com',
+ 'authorization': 'ATP %s' % self._session_token,
+ 'content-type': 'application/json',
+ 'Referer': url,
+ }
+ check_data = {
+ 'videoID': internal_id,
+ 'VideoUrlType': 'HLSV3',
+ }
+ check_json = json.dumps(check_data).encode('utf-8')
+ check_result = self._download_json(
+ 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
+ video_id, note='Checking video authorization', headers=headers, data=check_json)
+ formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
+
+ vdata_url = 'https://www.tennistv.com/api/channels/v1/de/none/video/%s' % video_id
+ vdata = self._download_json(vdata_url, video_id)
+
+ timestamp = unified_timestamp(vdata['timestamp'])
+ thumbnail = vdata['video']['thumbnailUrl']
+ description = vdata['displayText']['description']
+ title = vdata['video']['title']
+
+ series = vdata['tour']
+ venue = vdata['displayText']['venue']
+ round_str = vdata['seo']['round']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'series': series,
+ 'season': venue,
+ 'episode': round_str,
+ }
diff --git a/hypervideo_dl/extractor/tenplay.py b/hypervideo_dl/extractor/tenplay.py
new file mode 100644
index 0000000..cd30d57
--- /dev/null
+++ b/hypervideo_dl/extractor/tenplay.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ parse_age_limit,
+ parse_iso8601,
+ # smuggle_url,
+)
+
+
+class TenPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
+ _TESTS = [{
+ 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
+ 'info_dict': {
+ 'id': '6060533435001',
+ 'ext': 'mp4',
+ 'title': 'MasterChef - S1 Ep. 1',
+ 'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c',
+ 'age_limit': 10,
+ 'timestamp': 1240828200,
+ 'upload_date': '20090427',
+ 'uploader_id': '2199827728001',
+ },
+ 'params': {
+ # 'format': 'bestvideo',
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
+ 'only_matching': True,
+ }]
+ # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
+ _GEO_BYPASS = False
+ _FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect'
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ data = self._download_json(
+ 'https://10play.com.au/api/video/' + content_id, content_id)
+ video = data.get('video') or {}
+ metadata = data.get('metaData') or {}
+ brightcove_id = video.get('videoId') or metadata['showContentVideoId']
+ # brightcove_url = smuggle_url(
+ # self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ # {'geo_countries': ['AU']})
+ m3u8_url = self._request_webpage(HEADRequest(
+ self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
+ if '10play-not-in-oz' in m3u8_url:
+ self.raise_geo_restricted(countries=['AU'])
+ formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ # '_type': 'url_transparent',
+ # 'url': brightcove_url,
+ 'formats': formats,
+ 'id': brightcove_id,
+ 'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
+ 'description': video.get('description'),
+ 'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
+ 'series': metadata.get('showName'),
+ 'season': metadata.get('showContentSeason'),
+ 'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
+ 'thumbnail': video.get('poster'),
+ 'uploader_id': '2199827728001',
+ # 'ie_key': 'BrightcoveNew',
+ }
diff --git a/hypervideo_dl/extractor/testurl.py b/hypervideo_dl/extractor/testurl.py
new file mode 100644
index 0000000..84a14a0
--- /dev/null
+++ b/hypervideo_dl/extractor/testurl.py
@@ -0,0 +1,64 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class TestURLIE(InfoExtractor):
+ """ Allows addressing of the test cases as test:yout.*be_1 """
+
+ IE_DESC = False # Do not list
+ _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
+
+ def _real_extract(self, url):
+ from ..extractor import gen_extractors
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ extractor_id = mobj.group('extractor')
+ all_extractors = gen_extractors()
+
+ rex = re.compile(extractor_id, flags=re.IGNORECASE)
+ matching_extractors = [
+ e for e in all_extractors if rex.search(e.IE_NAME)]
+
+ if len(matching_extractors) == 0:
+ raise ExtractorError(
+ 'No extractors matching %r found' % extractor_id,
+ expected=True)
+ elif len(matching_extractors) > 1:
+ # Is it obvious which one to pick?
+ try:
+ extractor = next(
+ ie for ie in matching_extractors
+ if ie.IE_NAME.lower() == extractor_id.lower())
+ except StopIteration:
+ raise ExtractorError(
+ ('Found multiple matching extractors: %s' %
+ ' '.join(ie.IE_NAME for ie in matching_extractors)),
+ expected=True)
+ else:
+ extractor = matching_extractors[0]
+
+ num_str = mobj.group('num')
+ num = int(num_str) if num_str else 0
+
+ testcases = []
+ t = getattr(extractor, '_TEST', None)
+ if t:
+ testcases.append(t)
+ testcases.extend(getattr(extractor, '_TESTS', []))
+
+ try:
+ tc = testcases[num]
+ except IndexError:
+ raise ExtractorError(
+ ('Test case %d not found, got only %d tests' %
+ (num, len(testcases))),
+ expected=True)
+
+ self.to_screen('Test URL: %s' % tc['url'])
+
+ return self.url_result(tc['url'], video_id=video_id)
diff --git a/hypervideo_dl/extractor/tf1.py b/hypervideo_dl/extractor/tf1.py
new file mode 100644
index 0000000..23c2808
--- /dev/null
+++ b/hypervideo_dl/extractor/tf1.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class TF1IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tf1\.fr/[^/]+/(?P<program_slug>[^/]+)/videos/(?P<id>[^/?&#]+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html',
+ 'info_dict': {
+ 'id': '13641379',
+ 'ext': 'mp4',
+ 'title': 'md5:f392bc52245dc5ad43771650c96fb620',
+ 'description': 'md5:a02cdb217141fb2d469d6216339b052f',
+ 'upload_date': '20190611',
+ 'timestamp': 1560273989,
+ 'duration': 1738,
+ 'series': 'Quotidien avec Yann Barthès',
+ 'tags': ['intégrale', 'quotidien', 'Replay'],
+ },
+ 'params': {
+ # Sometimes wat serves the whole file with the --test option
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ program_slug, slug = re.match(self._VALID_URL, url).groups()
+ video = self._download_json(
+ 'https://www.tf1.fr/graphql/web', slug, query={
+ 'id': '9b80783950b85247541dd1d851f9cc7fa36574af015621f853ab111a679ce26f',
+ 'variables': json.dumps({
+ 'programSlug': program_slug,
+ 'slug': slug,
+ })
+ })['data']['videoBySlug']
+ wat_id = video['streamId']
+
+ tags = []
+ for tag in (video.get('tags') or []):
+ label = tag.get('label')
+ if not label:
+ continue
+ tags.append(label)
+
+ decoration = video.get('decoration') or {}
+
+ thumbnails = []
+ for source in (try_get(decoration, lambda x: x['image']['sources'], list) or []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ thumbnails.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ })
+
+ return {
+ '_type': 'url_transparent',
+ 'id': wat_id,
+ 'url': 'wat:' + wat_id,
+ 'title': video.get('title'),
+ 'thumbnails': thumbnails,
+ 'description': decoration.get('description'),
+ 'timestamp': parse_iso8601(video.get('date')),
+ 'duration': int_or_none(try_get(video, lambda x: x['publicPlayingInfos']['duration'])),
+ 'tags': tags,
+ 'series': decoration.get('programLabel'),
+ 'season_number': int_or_none(video.get('season')),
+ 'episode_number': int_or_none(video.get('episode')),
+ }
diff --git a/hypervideo_dl/extractor/tfo.py b/hypervideo_dl/extractor/tfo.py
new file mode 100644
index 0000000..0631cb7
--- /dev/null
+++ b/hypervideo_dl/extractor/tfo.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ ExtractorError,
+ int_or_none,
+ clean_html,
+)
+
+
+class TFOIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CA']
+ _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
+ 'md5': 'cafbe4f47a8dae0ca0159937878100d6',
+ 'info_dict': {
+ 'id': '7da3d50e495c406b8fc0b997659cc075',
+ 'ext': 'mp4',
+ 'title': 'Video Game Hackathon',
+ 'description': 'md5:558afeba217c6c8d96c60e5421795c07',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
+ infos = self._download_json(
+ 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
+ 'product_id': video_id,
+ }).encode(), headers={
+ 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
+ })
+ if infos.get('success') == 0:
+ if infos.get('code') == 'ErrGeoBlocked':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
+ video_data = infos['data']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:' + video_data['llid'],
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'series': video_data.get('collection'),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'ie_key': 'LimelightMedia',
+ }
diff --git a/hypervideo_dl/extractor/theintercept.py b/hypervideo_dl/extractor/theintercept.py
new file mode 100644
index 0000000..f23b587
--- /dev/null
+++ b/hypervideo_dl/extractor/theintercept.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class TheInterceptIE(InfoExtractor):
+ _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
+ 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
+ 'info_dict': {
+ 'id': '46214',
+ 'ext': 'mp4',
+ 'title': '#ThisIsACoup – Episode Four: Surrender or Die',
+ 'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
+ 'timestamp': 1450429239,
+ 'upload_date': '20151218',
+ 'comment_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._parse_json(self._search_regex(
+ r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage,
+ 'initialStoreTree'), display_id)
+
+ for post in json_data['resources']['posts'].values():
+ if post['slug'] == display_id:
+ return {
+ '_type': 'url_transparent',
+ 'url': 'jwplatform:%s' % post['fov_videoid'],
+ 'id': compat_str(post['ID']),
+ 'display_id': display_id,
+ 'title': post['title'],
+ 'description': post.get('excerpt'),
+ 'timestamp': parse_iso8601(post.get('date')),
+ 'comment_count': int_or_none(post.get('comments_number')),
+ }
+ raise ExtractorError('Unable to find the current post')
diff --git a/hypervideo_dl/extractor/theplatform.py b/hypervideo_dl/extractor/theplatform.py
new file mode 100644
index 0000000..adfe11e
--- /dev/null
+++ b/hypervideo_dl/extractor/theplatform.py
@@ -0,0 +1,414 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+import hmac
+import binascii
+import hashlib
+
+
+from .once import OnceIE
+from .adobepass import AdobePassIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ sanitized_Request,
+ unsmuggle_url,
+ update_url_query,
+ xpath_with_ns,
+ mimetype2ext,
+ find_xpath_attr,
+)
+
+default_ns = 'http://www.w3.org/2005/SMIL21/Language'
+_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
+
+
+class ThePlatformBaseIE(OnceIE):
+ _TP_TLD = 'com'
+
+ def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
+ meta = self._download_xml(
+ smil_url, video_id, note=note, query={'format': 'SMIL'},
+ headers=self.geo_verification_headers())
+ error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
+ if error_element is not None:
+ exception = find_xpath_attr(
+ error_element, _x('.//smil:param'), 'name', 'exception')
+ if exception is not None:
+ if exception.get('value') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(error_element.attrib['abstract'])
+ elif error_element.attrib['src'].startswith(
+ 'http://link.theplatform.%s/s/errorFiles/Unavailable.'
+ % self._TP_TLD):
+ raise ExtractorError(
+ error_element.attrib['abstract'], expected=True)
+
+ smil_formats = self._parse_smil_formats(
+ meta, smil_url, video_id, namespace=default_ns,
+ # the parameters are from syfy.com, other sites may use others,
+ # they also work for nbc.com
+ f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
+ transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
+
+ formats = []
+ for _format in smil_formats:
+ if OnceIE.suitable(_format['url']):
+ formats.extend(self._extract_once_formats(_format['url']))
+ else:
+ media_url = _format['url']
+ if determine_ext(media_url) == 'm3u8':
+ hdnea2 = self._get_cookies(media_url).get('hdnea2')
+ if hdnea2:
+ _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
+
+ formats.append(_format)
+
+ subtitles = self._parse_smil_subtitles(meta, default_ns)
+
+ return formats, subtitles
+
+ def _download_theplatform_metadata(self, path, video_id):
+ info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
+ return self._download_json(info_url, video_id)
+
+ def _parse_theplatform_metadata(self, info):
+ subtitles = {}
+ captions = info.get('captions')
+ if isinstance(captions, list):
+ for caption in captions:
+ lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+ subtitles.setdefault(lang, []).append({
+ 'ext': mimetype2ext(mime),
+ 'url': src,
+ })
+
+ duration = info.get('duration')
+ tp_chapters = info.get('chapters', [])
+ chapters = []
+ if tp_chapters:
+ def _add_chapter(start_time, end_time):
+ start_time = float_or_none(start_time, 1000)
+ end_time = float_or_none(end_time, 1000)
+ if start_time is None or end_time is None:
+ return
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ })
+
+ for chapter in tp_chapters[:-1]:
+ _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
+ _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
+
+ return {
+ 'title': info['title'],
+ 'subtitles': subtitles,
+ 'description': info['description'],
+ 'thumbnail': info['defaultThumbnailUrl'],
+ 'duration': float_or_none(duration, 1000),
+ 'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
+ 'uploader': info.get('billingCode'),
+ 'chapters': chapters,
+ }
+
+ def _extract_theplatform_metadata(self, path, video_id):
+ info = self._download_theplatform_metadata(path, video_id)
+ return self._parse_theplatform_metadata(info)
+
+
+class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
+ _VALID_URL = r'''(?x)
+ (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
+ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ |theplatform:)(?P<id>[^/\?&]+)'''
+
+ _TESTS = [{
+ # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
+ 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+ 'info_dict': {
+ 'id': 'e9I_cZgTgIPd',
+ 'ext': 'flv',
+ 'title': 'Blackberry\'s big, bold Z30',
+ 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+ 'duration': 247,
+ 'timestamp': 1383239700,
+ 'upload_date': '20131031',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+ 'info_dict': {
+ 'id': '22d_qsQ6MIRT',
+ 'ext': 'flv',
+ 'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+ 'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+ 'timestamp': 1426176191,
+ 'upload_date': '20150312',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
+ 'info_dict': {
+ 'id': 'yMBg9E8KFxZD',
+ 'ext': 'mp4',
+ 'description': 'md5:644ad9188d655b742f942bf2e06b002d',
+ 'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
+ 'uploader': 'EGSM',
+ }
+ }, {
+ 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
+ 'md5': 'fb96bb3d85118930a5b055783a3bd992',
+ 'info_dict': {
+ 'id': 'tdy_or_siri_150701',
+ 'ext': 'mp4',
+ 'title': 'iPhone Siri’s sassy response to a math question has people talking',
+ 'description': 'md5:a565d1deadd5086f3331d57298ec6333',
+ 'duration': 83.0,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1435752600,
+ 'upload_date': '20150701',
+ 'uploader': 'NBCU-NEWS',
+ },
+ }, {
+ # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
+ # geo-restricted (US), HLS encrypted with AES-128
+ 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_urls(cls, webpage):
+ m = re.search(
+ r'''(?x)
+ <meta\s+
+ property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
+ content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
+ ''', webpage)
+ if m:
+ return [m.group('url')]
+
+ # Are whitespaces ignored in URLs?
+ # https://github.com/ytdl-org/youtube-dl/issues/12044
+ matches = re.findall(
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+ if matches:
+ return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
+
+ @staticmethod
+ def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
+ flags = '10' if include_qs else '00'
+ expiration_date = '%x' % (int(time.time()) + life)
+
+ def str_to_hex(str):
+ return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
+
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
+
+ relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
+ clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
+ checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
+ sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
+ return '%s&sig=%s' % (url, sig)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ mobj = re.match(self._VALID_URL, url)
+ provider_id = mobj.group('provider_id')
+ video_id = mobj.group('id')
+
+ if not provider_id:
+ provider_id = 'dJ5BDC'
+
+ path = provider_id + '/'
+ if mobj.group('media'):
+ path += mobj.group('media')
+ path += video_id
+
+ qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ if 'guid' in qs_dict:
+ webpage = self._download_webpage(url, video_id)
+ scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
+ feed_id = None
+ # feed id usually locates in the last script.
+ # Seems there's no pattern for the interested script filename, so
+ # I try one by one
+ for script in reversed(scripts):
+ feed_script = self._download_webpage(
+ self._proto_relative_url(script, 'http:'),
+ video_id, 'Downloading feed script')
+ feed_id = self._search_regex(
+ r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
+ 'default feed id', default=None)
+ if feed_id is not None:
+ break
+ if feed_id is None:
+ raise ExtractorError('Unable to find feed id')
+ return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
+ provider_id, feed_id, qs_dict['guid'][0]))
+
+ if smuggled_data.get('force_smil_url', False):
+ smil_url = url
+ # Explicitly specified SMIL (see https://github.com/ytdl-org/youtube-dl/issues/7385)
+ elif '/guid/' in url:
+ headers = {}
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ headers['Referer'] = source_url
+ request = sanitized_Request(url, headers=headers)
+ webpage = self._download_webpage(request, video_id)
+ smil_url = self._search_regex(
+ r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
+ webpage, 'smil url', group='url')
+ path = self._search_regex(
+ r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
+ smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
+ elif mobj.group('config'):
+ config_url = url + '&form=json'
+ config_url = config_url.replace('swf/', 'config/')
+ config_url = config_url.replace('onsite/', 'onsite/config/')
+ config = self._download_json(config_url, video_id, 'Downloading config')
+ if 'releaseUrl' in config:
+ release_url = config['releaseUrl']
+ else:
+ release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
+ smil_url = release_url + '&formats=MPEG4&manifest=f4m'
+ else:
+ smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
+
+ sig = smuggled_data.get('sig')
+ if sig:
+ smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
+
+ formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
+ self._sort_formats(formats)
+
+ ret = self._extract_theplatform_metadata(path, video_id)
+ combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
+ ret.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': combined_subtitles,
+ })
+
+ return ret
+
+
+class ThePlatformFeedIE(ThePlatformBaseIE):
+ _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
+ _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
+ _TESTS = [{
+ # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
+ 'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
+ 'md5': '6e32495b5073ab414471b615c5ded394',
+ 'info_dict': {
+ 'id': 'n_hardball_5biden_140207',
+ 'ext': 'mp4',
+ 'title': 'The Biden factor: will Joe run in 2016?',
+ 'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140208',
+ 'timestamp': 1391824260,
+ 'duration': 467.0,
+ 'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
+ 'uploader': 'NBCU-NEWS',
+ },
+ }, {
+ 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
+ 'only_matching': True,
+ }]
+
+ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
+ real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
+ entry = self._download_json(real_url, video_id)['entries'][0]
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
+
+ formats = []
+ subtitles = {}
+ first_video_id = None
+ duration = None
+ asset_types = []
+ for item in entry['media$content']:
+ smil_url = item['plfile$url']
+ cur_video_id = ThePlatformIE._match_id(smil_url)
+ if first_video_id is None:
+ first_video_id = cur_video_id
+ duration = float_or_none(item.get('plfile$duration'))
+ file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+ for asset_type in file_asset_types:
+ if asset_type in asset_types:
+ continue
+ asset_types.append(asset_type)
+ query = {
+ 'mbr': 'true',
+ 'formats': item['plfile$format'],
+ 'assetTypes': asset_type,
+ }
+ if asset_type in asset_types_query:
+ query.update(asset_types_query[asset_type])
+ cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
+ main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+ formats.extend(cur_formats)
+ subtitles = self._merge_subtitles(subtitles, cur_subtitles)
+
+ self._sort_formats(formats)
+
+ thumbnails = [{
+ 'url': thumbnail['plfile$url'],
+ 'width': int_or_none(thumbnail.get('plfile$width')),
+ 'height': int_or_none(thumbnail.get('plfile$height')),
+ } for thumbnail in entry.get('media$thumbnails', [])]
+
+ timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
+ categories = [item['media$name'] for item in entry.get('media$categories', [])]
+
+ ret = self._extract_theplatform_metadata('%s/%s' % (provider_id, first_video_id), video_id)
+ subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
+ ret.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'categories': categories,
+ })
+ if custom_fields:
+ ret.update(custom_fields(entry))
+
+ return ret
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ provider_id = mobj.group('provider_id')
+ feed_id = mobj.group('feed_id')
+ filter_query = mobj.group('filter')
+
+ return self._extract_feed_info(provider_id, feed_id, filter_query, video_id)
diff --git a/hypervideo_dl/extractor/thescene.py b/hypervideo_dl/extractor/thescene.py
new file mode 100644
index 0000000..cd64235
--- /dev/null
+++ b/hypervideo_dl/extractor/thescene.py
@@ -0,0 +1,44 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..compat import compat_urlparse
+
+
+class TheSceneIE(InfoExtractor):
+ _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
+
+ _TEST = {
+ 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
+ 'info_dict': {
+ 'id': '520e8faac2b4c00e3c6e5f43',
+ 'ext': 'mp4',
+ 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
+ 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
+ 'duration': 127,
+ 'series': 'Style.com Fashion Shows',
+ 'season': 'Ready To Wear Spring 2013',
+ 'tags': list,
+ 'categories': list,
+ 'upload_date': '20120913',
+ 'timestamp': 1347512400,
+ 'uploader': 'vogue',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ player_url = compat_urlparse.urljoin(
+ url,
+ self._html_search_regex(
+ r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'url': player_url,
+ 'ie_key': 'CondeNast',
+ }
diff --git a/hypervideo_dl/extractor/thestar.py b/hypervideo_dl/extractor/thestar.py
new file mode 100644
index 0000000..c3f1188
--- /dev/null
+++ b/hypervideo_dl/extractor/thestar.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TheStarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P<id>.+)\.html'
+ _TEST = {
+ 'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html',
+ 'md5': '2c62dd4db2027e35579fefb97a8b6554',
+ 'info_dict': {
+ 'id': '4732393888001',
+ 'ext': 'mp4',
+ 'title': 'Mankind: Why this woman started a men\'s skin care line',
+ 'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.',
+ 'uploader_id': '794267642001',
+ 'timestamp': 1454353482,
+ 'upload_date': '20160201',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ brightcove_id = self._search_regex(
+ r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)',
+ webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'BrightcoveNew', brightcove_id)
diff --git a/hypervideo_dl/extractor/thesun.py b/hypervideo_dl/extractor/thesun.py
new file mode 100644
index 0000000..15d4a69
--- /dev/null
+++ b/hypervideo_dl/extractor/thesun.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class TheSunIE(InfoExtractor):
+ _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
+ 'info_dict': {
+ 'id': '2261604',
+ 'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
+ },
+ 'playlist_count': 2,
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, article_id)
+
+ entries = []
+ for video in re.findall(
+ r'<video[^>]+data-video-id-pending=[^>]+>',
+ webpage):
+ attrs = extract_attributes(video)
+ video_id = attrs['data-video-id-pending']
+ account_id = attrs.get('data-account', '5067014667001')
+ entries.append(self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id),
+ 'BrightcoveNew', video_id))
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage, fatal=False))
diff --git a/hypervideo_dl/extractor/theweatherchannel.py b/hypervideo_dl/extractor/theweatherchannel.py
new file mode 100644
index 0000000..b2a8c37
--- /dev/null
+++ b/hypervideo_dl/extractor/theweatherchannel.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class TheWeatherChannelIE(ThePlatformIE):
+ _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
+ _TESTS = [{
+ 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
+ 'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
+ 'info_dict': {
+ 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
+ 'ext': 'mp4',
+ 'title': 'Ice Climber Is In For A Shock',
+ 'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
+ 'uploader': 'TWC - Digital (No Distro)',
+ 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
+ 'upload_date': '20160720',
+ 'timestamp': 1469018835,
+ }
+ }, {
+ 'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
+ if not locale:
+ locale = 'en-US'
+ video_data = list(self._download_json(
+ 'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
+ 'name': 'getCMSAssetsUrlConfig',
+ 'params': {
+ 'language': locale.replace('-', '_'),
+ 'query': {
+ 'assetName': {
+ '$in': asset_name,
+ },
+ },
+ }
+ }]).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
+ video_id = video_data['id']
+ seo_meta = video_data.get('seometa', {})
+ title = video_data.get('title') or seo_meta['title']
+
+ urls = []
+ thumbnails = []
+ formats = []
+ for variant_id, variant_url in video_data.get('variants', []).items():
+ variant_url = variant_url.strip()
+ if not variant_url or variant_url in urls:
+ continue
+ urls.append(variant_url)
+ ext = determine_ext(variant_url)
+ if ext == 'jpg':
+ thumbnails.append({
+ 'url': variant_url,
+ 'id': variant_id,
+ })
+ elif ThePlatformIE.suitable(variant_url):
+ tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id)
+ formats.extend(tp_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=variant_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ variant_url, video_id, f4m_id=variant_id, fatal=False))
+ else:
+ formats.append({
+ 'url': variant_url,
+ 'format_id': variant_id,
+ })
+ self._sort_formats(formats)
+
+ cc_url = video_data.get('cc_url')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'uploader': video_data.get('providername'),
+ 'uploader_id': video_data.get('providerid'),
+ 'timestamp': parse_iso8601(video_data.get('publishdate')),
+ 'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/thisamericanlife.py b/hypervideo_dl/extractor/thisamericanlife.py
new file mode 100644
index 0000000..91e45f2
--- /dev/null
+++ b/hypervideo_dl/extractor/thisamericanlife.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisAmericanLifeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
+ 'md5': '8f7d2da8926298fdfca2ee37764c11ce',
+ 'info_dict': {
+ 'id': '487',
+ 'ext': 'm4a',
+ 'title': '487: Harper High School, Part One',
+ 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id),
+ 'protocol': 'm3u8_native',
+ 'ext': 'm4a',
+ 'acodec': 'aac',
+ 'vcodec': 'none',
+ 'abr': 64,
+ 'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
+ 'description': self._html_search_meta(r'description', webpage, 'description'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/thisav.py b/hypervideo_dl/extractor/thisav.py
new file mode 100644
index 0000000..dc3dd03
--- /dev/null
+++ b/hypervideo_dl/extractor/thisav.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class ThisAVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
+ _TESTS = [{
+ # jwplayer
+ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
+ 'md5': '0480f1ef3932d901f0e0e719f188f19b',
+ 'info_dict': {
+ 'id': '47734',
+ 'ext': 'flv',
+ 'title': '高樹マリア - Just fit',
+ 'uploader': 'dj7970',
+ 'uploader_id': 'dj7970'
+ }
+ }, {
+ # html5 media
+ 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
+ 'md5': 'ba90c076bd0f80203679e5b60bf523ee',
+ 'info_dict': {
+ 'id': '242352',
+ 'ext': 'mp4',
+ 'title': 'Nerdy 18yo Big Ass Tattoos and Glasses',
+ 'uploader': 'cybersluts',
+ 'uploader_id': 'cybersluts',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ title = remove_end(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'),
+ ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
+ video_url = self._html_search_regex(
+ r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
+ if video_url:
+ info_dict = {
+ 'formats': [{
+ 'url': video_url,
+ }],
+ }
+ else:
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+ if entries:
+ info_dict = entries[0]
+ else:
+ info_dict = self._extract_jwplayer_data(
+ webpage, video_id, require_title=False)
+ uploader = self._html_search_regex(
+ r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
+ webpage, 'uploader name', fatal=False)
+ uploader_id = self._html_search_regex(
+ r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
+ webpage, 'uploader id', fatal=False)
+
+ info_dict.update({
+ 'id': video_id,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'title': title,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/thisoldhouse.py b/hypervideo_dl/extractor/thisoldhouse.py
new file mode 100644
index 0000000..a3d9b40
--- /dev/null
+++ b/hypervideo_dl/extractor/thisoldhouse.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisOldHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+ 'info_dict': {
+ 'id': '5dcdddf673c3f956ef5db202',
+ 'ext': 'mp4',
+ 'title': 'How to Build a Storage Bench',
+ 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+ 'timestamp': 1442548800,
+ 'upload_date': '20150918',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
+ 'only_matching': True,
+ }, {
+ # iframe www.thisoldhouse.com
+ 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+ 'only_matching': True,
+ }]
+ _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
+ webpage, 'video id')
+ return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
diff --git a/hypervideo_dl/extractor/threeqsdn.py b/hypervideo_dl/extractor/threeqsdn.py
new file mode 100644
index 0000000..f6d37bb
--- /dev/null
+++ b/hypervideo_dl/extractor/threeqsdn.py
@@ -0,0 +1,164 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class ThreeQSDNIE(InfoExtractor):
+ IE_NAME = '3qsdn'
+ IE_DESC = '3Q SDN'
+ _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ # https://player.3qsdn.com/demo.html
+ 'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
+ 'md5': '64a57396b16fa011b15e0ea60edce918',
+ 'info_dict': {
+ 'id': '7201c779-6b3c-11e7-a40e-002590c750be',
+ 'ext': 'mp4',
+ 'title': 'Video Ads',
+ 'is_live': False,
+ 'description': 'Video Ads Demo',
+ 'timestamp': 1500334803,
+ 'upload_date': '20170717',
+ 'duration': 888.032,
+ 'subtitles': {
+ 'eng': 'count:1',
+ },
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }, {
+ # live video stream
+ 'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be',
+ 'info_dict': {
+ 'id': '66e68995-11ca-11e8-9273-002590c750be',
+ 'ext': 'mp4',
+ 'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ },
+ }, {
+ # live audio stream
+ 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # live audio stream with some 404 URLs
+ 'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # geo restricted with 'This content is not available in your country'
+ 'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # geo restricted with 'playout.3qsdn.com/forbidden'
+ 'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48',
+ 'only_matching': True,
+ }, {
+ # live video with rtmp link
+ 'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
+ 'only_matching': True,
+ }, {
+ # ondemand from http://www.philharmonie.tv/veranstaltung/26/
+ 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
+ 'only_matching': True,
+ }, {
+ # live video stream
+ 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ config = self._download_json(
+ url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self.raise_geo_restricted()
+ raise
+
+ live = config.get('streamContent') == 'live'
+ aspect = float_or_none(config.get('aspect'))
+
+ formats = []
+ for source_type, source in (config.get('sources') or {}).items():
+ if not source:
+ continue
+ if source_type == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source, video_id, mpd_id='mpd', fatal=False))
+ elif source_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif source_type == 'progressive':
+ for s in source:
+ src = s.get('src')
+ if not (src and self._is_valid_url(src, video_id)):
+ continue
+ width = None
+ format_id = ['http']
+ ext = determine_ext(src)
+ if ext:
+ format_id.append(ext)
+ height = int_or_none(s.get('height'))
+ if height:
+ format_id.append('%dp' % height)
+ if aspect:
+ width = int(height * aspect)
+ formats.append({
+ 'ext': ext,
+ 'format_id': '-'.join(format_id),
+ 'height': height,
+ 'source_preference': 0,
+ 'url': src,
+ 'vcodec': 'none' if height == 0 else None,
+ 'width': width,
+ })
+ for f in formats:
+ if f.get('acodec') == 'none':
+ f['preference'] = -40
+ elif f.get('vcodec') == 'none':
+ f['preference'] = -50
+ self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id'))
+
+ subtitles = {}
+ for subtitle in (config.get('subtitles') or []):
+ src = subtitle.get('src')
+ if not src:
+ continue
+ subtitles.setdefault(subtitle.get('label') or 'eng', []).append({
+ 'url': src,
+ })
+
+ title = config.get('title') or video_id
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if live else title,
+ 'thumbnail': config.get('poster') or None,
+ 'description': config.get('description') or None,
+ 'timestamp': parse_iso8601(config.get('upload_date')),
+ 'duration': float_or_none(config.get('vlength')) or None,
+ 'is_live': live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/tiktok.py b/hypervideo_dl/extractor/tiktok.py
new file mode 100644
index 0000000..4faa6de
--- /dev/null
+++ b/hypervideo_dl/extractor/tiktok.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class TikTokBaseIE(InfoExtractor):
+ def _extract_video(self, data, video_id=None):
+ video = data['video']
+ description = str_or_none(try_get(data, lambda x: x['desc']))
+ width = int_or_none(try_get(data, lambda x: video['width']))
+ height = int_or_none(try_get(data, lambda x: video['height']))
+
+ format_urls = set()
+ formats = []
+ for format_id in ('download', 'play'):
+ format_url = url_or_none(video.get('%sAddr' % format_id))
+ if not format_url:
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'height': height,
+ 'width': width,
+ 'http_headers': {
+ 'Referer': 'https://www.tiktok.com/',
+ }
+ })
+ self._sort_formats(formats)
+
+ thumbnail = url_or_none(video.get('cover'))
+ duration = float_or_none(video.get('duration'))
+
+ uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
+ uploader_id = try_get(data, lambda x: x['author']['id'], compat_str)
+
+ timestamp = int_or_none(data.get('createTime'))
+
+ def stats(key):
+ return int_or_none(try_get(
+ data, lambda x: x['stats']['%sCount' % key]))
+
+ view_count = stats('play')
+ like_count = stats('digg')
+ comment_count = stats('comment')
+ repost_count = stats('share')
+
+ aweme_id = data.get('id') or video_id
+
+ return {
+ 'id': aweme_id,
+ 'title': uploader or aweme_id,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'repost_count': repost_count,
+ 'formats': formats,
+ }
+
+
+class TikTokIE(TikTokBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@[^/]+/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/@zureeal/video/6606727368545406213',
+ 'md5': '163ceff303bb52de60e6887fe399e6cd',
+ 'info_dict': {
+ 'id': '6606727368545406213',
+ 'ext': 'mp4',
+ 'title': 'Zureeal',
+ 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 15,
+ 'uploader': 'Zureeal',
+ 'uploader_id': '188294915489964032',
+ 'timestamp': 1538248586,
+ 'upload_date': '20180929',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ }
+ }]
+
+ def _real_initialize(self):
+ # Setup session (will set necessary cookies)
+ self._request_webpage(
+ 'https://www.tiktok.com/', None, note='Setting up session')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ page_props = self._parse_json(self._search_regex(
+ r'<script[^>]+\bid=["\']__NEXT_DATA__[^>]+>\s*({.+?})\s*</script',
+ webpage, 'data'), video_id)['props']['pageProps']
+ data = try_get(page_props, lambda x: x['itemInfo']['itemStruct'], dict)
+ if not data and page_props.get('statusCode') == 10216:
+ raise ExtractorError('This video is private', expected=True)
+ return self._extract_video(data, video_id)
+
+
+class TikTokUserIE(TikTokBaseIE):
+ _VALID_URL = r'https://(?:www\.)?tiktok\.com/@(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/@zureeal',
+ 'info_dict': {
+ 'id': '188294915489964032',
+ },
+ 'playlist_mincount': 24,
+ }]
+ _WORKING = False
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TikTokIE.suitable(url) else super(TikTokUserIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ data = self._download_json(
+ 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
+ query={'_signature': '_'})
+ entries = []
+ for aweme in data['aweme_list']:
+ try:
+ entry = self._extract_video(aweme)
+ except ExtractorError:
+ continue
+ entry['extractor_key'] = TikTokIE.ie_key()
+ entries.append(entry)
+ return self.playlist_result(entries, user_id)
diff --git a/hypervideo_dl/extractor/tinypic.py b/hypervideo_dl/extractor/tinypic.py
new file mode 100644
index 0000000..bc2def5
--- /dev/null
+++ b/hypervideo_dl/extractor/tinypic.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class TinyPicIE(InfoExtractor):
+ IE_NAME = 'tinypic'
+ IE_DESC = 'tinypic.com videos'
+ _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
+
+ _TESTS = [
+ {
+ 'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
+ 'md5': '609b74432465364e72727ebc6203f044',
+ 'info_dict': {
+ 'id': '6xw7tc',
+ 'ext': 'flv',
+ 'title': 'shadow phenomenon weird',
+ },
+ },
+ {
+ 'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id, 'Downloading page')
+
+ mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
+ r'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
+ if mobj is None:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ file_id = mobj.group('fileid')
+ server_id = mobj.group('serverid')
+
+ KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
+ keywords = self._html_search_meta('keywords', webpage, 'title')
+ title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
+
+ video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
+ thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
+
+ return {
+ 'id': file_id,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'title': title
+ }
diff --git a/hypervideo_dl/extractor/tmz.py b/hypervideo_dl/extractor/tmz.py
new file mode 100644
index 0000000..3d1bf75
--- /dev/null
+++ b/hypervideo_dl/extractor/tmz.py
@@ -0,0 +1,111 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from .kaltura import KalturaIE
+from ..utils import (
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class TMZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.tmz.com/videos/0-cegprt2p/',
+ 'md5': '31f9223e20eef55954973359afa61a20',
+ 'info_dict': {
+ 'id': 'P6YjLBLk',
+ 'ext': 'mp4',
+ 'title': "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
+ 'description': 'md5:b714359fc18607715ebccbd2da8ff488',
+ 'timestamp': 1467831837,
+ 'upload_date': '20160706',
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ }, {
+ 'url': 'http://www.tmz.com/videos/0_okj015ty/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tmz.com/videos/2021-02-19-021921-floyd-mayweather-1043872/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('-', '_')
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+ if webpage:
+ tmz_video_id = self._search_regex(
+ r'nodeRef\s*:\s*["\']tmz:video:([\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12})',
+ webpage, 'video id', default=None)
+ video = self._download_json(
+ 'https://www.tmz.com/_/video/%s' % tmz_video_id, video_id,
+ fatal=False)
+ if video:
+ message = video['message']
+ info = {
+ '_type': 'url_transparent',
+ 'title': message.get('title'),
+ 'description': message.get('description'),
+ 'timestamp': unified_timestamp(message.get('published_at')),
+ 'duration': int_or_none(message.get('duration')),
+ }
+ jwplatform_id = message.get('jwplayer_media_id')
+ if jwplatform_id:
+ info.update({
+ 'url': 'jwplatform:%s' % jwplatform_id,
+ 'ie_key': JWPlatformIE.ie_key(),
+ })
+ else:
+ kaltura_entry_id = message.get('kaltura_entry_id') or video_id
+ kaltura_partner_id = message.get('kaltura_partner_id') or '591531'
+ info.update({
+ 'url': 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id),
+ 'ie_key': KalturaIE.ie_key(),
+ })
+ return info
+
+ return self.url_result(
+ 'kaltura:591531:%s' % video_id, KalturaIE.ie_key(), video_id)
+
+
+class TMZArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+ 'info_dict': {
+ 'id': 'PAKZa97W',
+ 'ext': 'mp4',
+ 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
+ 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
+ 'timestamp': 1429466400,
+ 'upload_date': '20150419',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ tmz_url = self._search_regex(
+ r'clickLink\s*\(\s*["\'](?P<url>%s)' % TMZIE._VALID_URL, webpage,
+ 'video id', default=None, group='url')
+ if tmz_url:
+ return self.url_result(tmz_url, ie=TMZIE.ie_key())
+
+ embedded_video_info = self._parse_json(self._html_search_regex(
+ r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
+ video_id)
+ return self.url_result(
+ 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'],
+ ie=TMZIE.ie_key())
diff --git a/hypervideo_dl/extractor/tnaflix.py b/hypervideo_dl/extractor/tnaflix.py
new file mode 100644
index 0000000..b3573c6
--- /dev/null
+++ b/hypervideo_dl/extractor/tnaflix.py
@@ -0,0 +1,327 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ str_to_int,
+ unescapeHTML,
+ xpath_text,
+)
+
+
+class TNAFlixNetworkBaseIE(InfoExtractor):
+ # May be overridden in descendants if necessary
+ _CONFIG_REGEX = [
+ r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
+ r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
+ r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
+ ]
+ _HOST = 'tna'
+ _VKEY_SUFFIX = ''
+ _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
+ _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
+ _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
+ _VIEW_COUNT_REGEX = None
+ _COMMENT_COUNT_REGEX = None
+ _AVERAGE_RATING_REGEX = None
+ _CATEGORIES_REGEX = r'<li[^>]*>\s*<span[^>]+class="infoTitle"[^>]*>Categories:</span>\s*<span[^>]+class="listView"[^>]*>(.+?)</span>\s*</li>'
+
+ def _extract_thumbnails(self, flix_xml):
+
+ def get_child(elem, names):
+ for name in names:
+ child = elem.find(name)
+ if child is not None:
+ return child
+
+ timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage'])
+ if timeline is None:
+ return
+
+ pattern_el = get_child(timeline, ['imagePattern', 'pattern'])
+ if pattern_el is None or not pattern_el.text:
+ return
+
+ first_el = get_child(timeline, ['imageFirst', 'first'])
+ last_el = get_child(timeline, ['imageLast', 'last'])
+ if first_el is None or last_el is None:
+ return
+
+ first_text = first_el.text
+ last_text = last_el.text
+ if not first_text.isdigit() or not last_text.isdigit():
+ return
+
+ first = int(first_text)
+ last = int(last_text)
+ if first > last:
+ return
+
+ width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width'))
+ height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height'))
+
+ return [{
+ 'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'),
+ 'width': width,
+ 'height': height,
+ } for i in range(first, last + 1)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ for display_id_key in ('display_id', 'display_id_2'):
+ if display_id_key in mobj.groupdict():
+ display_id = mobj.group(display_id_key)
+ if display_id:
+ break
+ else:
+ display_id = video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ cfg_url = self._proto_relative_url(self._html_search_regex(
+ self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
+ group='url'), 'http:')
+
+ if not cfg_url:
+ inputs = self._hidden_inputs(webpage)
+ cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
+ % (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id))
+
+ cfg_xml = self._download_xml(
+ cfg_url, display_id, 'Downloading metadata',
+ transform_source=fix_xml_ampersands, headers={'Referer': url})
+
+ formats = []
+
+ def extract_video_url(vl):
+ # Any URL modification now results in HTTP Error 403: Forbidden
+ return unescapeHTML(vl.text)
+
+ video_link = cfg_xml.find('./videoLink')
+ if video_link is not None:
+ formats.append({
+ 'url': extract_video_url(video_link),
+ 'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
+ })
+
+ for item in cfg_xml.findall('./quality/item'):
+ video_link = item.find('./videoLink')
+ if video_link is None:
+ continue
+ res = item.find('res')
+ format_id = None if res is None else res.text
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ self._sort_formats(formats)
+
+ thumbnail = self._proto_relative_url(
+ xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
+ thumbnails = self._extract_thumbnails(cfg_xml)
+
+ title = None
+ if self._TITLE_REGEX:
+ title = self._html_search_regex(
+ self._TITLE_REGEX, webpage, 'title', default=None)
+ if not title:
+ title = self._og_search_title(webpage)
+
+ age_limit = self._rta_search(webpage) or 18
+
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', default=None))
+
+ def extract_field(pattern, name):
+ return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
+
+ description = extract_field(self._DESCRIPTION_REGEX, 'description')
+ uploader = extract_field(self._UPLOADER_REGEX, 'uploader')
+ view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))
+ comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count'))
+ average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
+
+ categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
+ categories = [c.strip() for c in categories_str.split(',')] if categories_str is not None else []
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+
+class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://player\.(?:tna|emp)flix\.com/video/(?P<id>\d+)'
+
+ _TITLE_REGEX = r'<title>([^<]+)</title>'
+
+ _TESTS = [{
+ 'url': 'https://player.tnaflix.com/video/6538',
+ 'info_dict': {
+ 'id': '6538',
+ 'display_id': '6538',
+ 'ext': 'mp4',
+ 'title': 'Educational xxx video',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://player.empflix.com/video/33051',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
+ webpage)]
+
+
+class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
+ _DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
+ _UPLOADER_REGEX = r'<span>by\s*<a[^>]+\bhref=["\']/profile/[^>]+>([^<]+)<'
+ _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
+
+
+class TNAFlixIE(TNAEMPFlixBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
+
+ _TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
+
+ _TESTS = [{
+ # anonymous uploader, no categories
+ 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+ 'md5': '7e569419fe6d69543d01e6be22f5f7c4',
+ 'info_dict': {
+ 'id': '553878',
+ 'display_id': 'Carmella-Decesare-striptease',
+ 'ext': 'mp4',
+ 'title': 'Carmella Decesare - striptease',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 91,
+ 'age_limit': 18,
+ 'categories': ['Porn Stars'],
+ }
+ }, {
+ # non-anonymous uploader, categories
+ 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
+ 'md5': '0f5d4d490dbfd117b8607054248a07c0',
+ 'info_dict': {
+ 'id': '6538',
+ 'display_id': 'Educational-xxx-video',
+ 'ext': 'mp4',
+ 'title': 'Educational xxx video',
+ 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 164,
+ 'age_limit': 18,
+ 'uploader': 'bobwhite39',
+ 'categories': list,
+ }
+ }, {
+ 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+ 'only_matching': True,
+ }]
+
+
+class EMPFlixIE(TNAEMPFlixBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
+
+ _HOST = 'emp'
+ _VKEY_SUFFIX = '-1'
+
+ _TESTS = [{
+ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+ 'md5': 'bc30d48b91a7179448a0bda465114676',
+ 'info_dict': {
+ 'id': '33051',
+ 'display_id': 'Amateur-Finger-Fuck',
+ 'ext': 'mp4',
+ 'title': 'Amateur Finger Fuck',
+ 'description': 'Amateur solo finger fucking.',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 83,
+ 'age_limit': 18,
+ 'uploader': 'cwbike',
+ 'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'],
+ }
+ }, {
+ 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
+ 'only_matching': True,
+ }]
+
+
+class MovieFapIE(TNAFlixNetworkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
+
+ _VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
+ _COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
+ _AVERAGE_RATING_REGEX = r'Current Rating\s*<br>\s*<strong>([\d.]+)</strong>'
+ _CATEGORIES_REGEX = r'(?s)<div[^>]+id="vid_info"[^>]*>\s*<div[^>]*>.+?</div>(.*?)<br>'
+
+ _TESTS = [{
+ # normal, multi-format video
+ 'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html',
+ 'md5': '26624b4e2523051b550067d547615906',
+ 'info_dict': {
+ 'id': 'be9867c9416c19f54a4a',
+ 'display_id': 'experienced-milf-amazing-handjob',
+ 'ext': 'mp4',
+ 'title': 'Experienced MILF Amazing Handjob',
+ 'description': 'Experienced MILF giving an Amazing Handjob',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'uploader': 'darvinfred06',
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'],
+ }
+ }, {
+ # quirky single-format case where the extension is given as fid, but the video is really an flv
+ 'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html',
+ 'md5': 'fa56683e291fc80635907168a743c9ad',
+ 'info_dict': {
+ 'id': 'e5da0d3edce5404418f5',
+ 'display_id': 'jeune-couple-russe',
+ 'ext': 'flv',
+ 'title': 'Jeune Couple Russe',
+ 'description': 'Amateur',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'uploader': 'whiskeyjar',
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['Amateur', 'Teen'],
+ }
+ }]
diff --git a/hypervideo_dl/extractor/toggle.py b/hypervideo_dl/extractor/toggle.py
new file mode 100644
index 0000000..270c84d
--- /dev/null
+++ b/hypervideo_dl/extractor/toggle.py
@@ -0,0 +1,234 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+)
+
+
+class ToggleIE(InfoExtractor):
+ IE_NAME = 'toggle'
+ _VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
+ 'info_dict': {
+ 'id': '343115',
+ 'ext': 'mp4',
+ 'title': 'Lion Moms Premiere',
+ 'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
+ 'upload_date': '20150910',
+ 'timestamp': 1441858274,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }, {
+ 'note': 'DRM-protected video',
+ 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
+ 'info_dict': {
+ 'id': '341413',
+ 'ext': 'wvm',
+ 'title': 'Dug\'s Special Mission',
+ 'description': 'md5:e86c6f4458214905c1772398fabc93e0',
+ 'upload_date': '20150827',
+ 'timestamp': 1440644006,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ }
+ }, {
+ # this also tests correct video id extraction
+ 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
+ 'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
+ 'info_dict': {
+ 'id': '332861',
+ 'ext': 'mp4',
+ 'title': '28th SEA Games (5 Show) - Episode 11',
+ 'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
+ 'upload_date': '20150605',
+ 'timestamp': 1433480166,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ },
+ 'skip': 'm3u8 links are geo-restricted'
+ }, {
+ 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
+ 'only_matching': True,
+ }]
+
+ _API_USER = 'tvpapi_147'
+ _API_PASS = '11111'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ params = {
+ 'initObj': {
+ 'Locale': {
+ 'LocaleLanguage': '',
+ 'LocaleCountry': '',
+ 'LocaleDevice': '',
+ 'LocaleUserState': 0
+ },
+ 'Platform': 0,
+ 'SiteGuid': 0,
+ 'DomainID': '0',
+ 'UDID': '',
+ 'ApiUser': self._API_USER,
+ 'ApiPass': self._API_PASS
+ },
+ 'MediaID': video_id,
+ 'mediaType': 0,
+ }
+
+ info = self._download_json(
+ 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
+ video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
+
+ title = info['MediaName']
+
+ formats = []
+ for video_file in info.get('Files', []):
+ video_url, vid_format = video_file.get('URL'), video_file.get('Format')
+ if not video_url or video_url == 'NA' or not vid_format:
+ continue
+ ext = determine_ext(video_url)
+ vid_format = vid_format.replace(' ', '')
+ # if geo-restricted, m3u8 is inaccessible, but mp4 is okay
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id=vid_format,
+ note='Downloading %s m3u8 information' % vid_format,
+ errnote='Failed to download %s m3u8 information' % vid_format,
+ fatal=False)
+ for f in m3u8_formats:
+ # Apple FairPlay Streaming
+ if '/fpshls/' in f['url']:
+ continue
+ formats.append(f)
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id=vid_format,
+ note='Downloading %s MPD manifest' % vid_format,
+ errnote='Failed to download %s MPD manifest' % vid_format,
+ fatal=False))
+ elif ext == 'ism':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, ism_id=vid_format,
+ note='Downloading %s ISM manifest' % vid_format,
+ errnote='Failed to download %s ISM manifest' % vid_format,
+ fatal=False))
+ elif ext == 'mp4':
+ formats.append({
+ 'ext': ext,
+ 'url': video_url,
+ 'format_id': vid_format,
+ })
+ if not formats:
+ for meta in (info.get('Metas') or []):
+ if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
+ raise ExtractorError(
+ 'This video is DRM protected.', expected=True)
+ # Most likely because geo-blocked
+ raise ExtractorError('No downloadable videos found', expected=True)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for picture in info.get('Pictures', []):
+ if not isinstance(picture, dict):
+ continue
+ pic_url = picture.get('URL')
+ if not pic_url:
+ continue
+ thumbnail = {
+ 'url': pic_url,
+ }
+ pic_size = picture.get('PicSize', '')
+ m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
+ if m:
+ thumbnail.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ def counter(prefix):
+ return int_or_none(
+ info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(info.get('Description')),
+ 'duration': int_or_none(info.get('Duration')),
+ 'timestamp': parse_iso8601(info.get('CreationDate') or None),
+ 'average_rating': float_or_none(info.get('Rating')),
+ 'view_count': counter('View'),
+ 'like_count': counter('Like'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+
+class MeWatchIE(InfoExtractor):
+ IE_NAME = 'mewatch'
+ _VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
+ 'info_dict': {
+ 'id': '1008625',
+ 'ext': 'mp4',
+ 'title': 'Recipe Of Life 味之道',
+ 'timestamp': 1603306526,
+ 'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
+ 'upload_date': '20201021',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ }, {
+ 'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ custom_id = self._download_json(
+ 'https://cdn.mewatch.sg/api/items/' + item_id,
+ item_id, query={'segments': 'all'})['customId']
+ return self.url_result(
+ 'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)
diff --git a/hypervideo_dl/extractor/tonline.py b/hypervideo_dl/extractor/tonline.py
new file mode 100644
index 0000000..cc11eae
--- /dev/null
+++ b/hypervideo_dl/extractor/tonline.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TOnlineIE(InfoExtractor):
+ IE_NAME = 't-online.de'
+ _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html',
+ 'md5': '7d94dbdde5f9d77c5accc73c39632c29',
+ 'info_dict': {
+ 'id': '79166266',
+ 'ext': 'mp4',
+ 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"',
+ 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id)
+ title = video_data['subtitle']
+
+ formats = []
+ for asset in video_data.get('assets', []):
+ asset_source = asset.get('source') or asset.get('source2')
+ if not asset_source:
+ continue
+ formats_id = []
+ for field_key in ('type', 'profile'):
+ field_value = asset.get(field_key)
+ if field_value:
+ formats_id.append(field_value)
+ formats.append({
+ 'format_id': '-'.join(formats_id),
+ 'url': asset_source,
+ })
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_source = image.get('source')
+ if not image_source:
+ continue
+ thumbnails.append({
+ 'url': image_source,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/toongoggles.py b/hypervideo_dl/extractor/toongoggles.py
new file mode 100644
index 0000000..b5ba1c0
--- /dev/null
+++ b/hypervideo_dl/extractor/toongoggles.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class ToonGogglesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football',
+ 'md5': '18289fc2b951eff6b953a9d8f01e6831',
+ 'info_dict': {
+ 'id': '217147',
+ 'ext': 'mp4',
+ 'title': 'Football',
+ 'uploader_id': '1',
+ 'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.',
+ 'upload_date': '20160718',
+ 'timestamp': 1468879330,
+ }
+ }, {
+ 'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world',
+ 'info_dict': {
+ 'id': '227759',
+ 'title': 'Om Nom Stories Around The World',
+ },
+ 'playlist_mincount': 11,
+ }]
+
+ def _call_api(self, action, page_id, query):
+ query.update({
+ 'for_ng': 1,
+ 'for_web': 1,
+ 'show_meta': 1,
+ 'version': 7.0,
+ })
+ return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query)
+
+ def _parse_episode_data(self, episode_data):
+ title = episode_data['episode_name']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': episode_data['episode_id'],
+ 'title': title,
+ 'url': 'kaltura:513551:' + episode_data['entry_id'],
+ 'thumbnail': episode_data.get('thumbnail_url'),
+ 'description': episode_data.get('description'),
+ 'duration': parse_duration(episode_data.get('hms')),
+ 'series': episode_data.get('show_name'),
+ 'season_number': int_or_none(episode_data.get('season_num')),
+ 'episode_id': episode_data.get('episode_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(episode_data.get('episode_num')),
+ 'categories': episode_data.get('categories'),
+ 'ie_key': 'Kaltura',
+ }
+
+ def _real_extract(self, url):
+ show_id, episode_id = re.match(self._VALID_URL, url).groups()
+ if episode_id:
+ episode_data = self._call_api('search', episode_id, {
+ 'filter': 'episode',
+ 'id': episode_id,
+ })['objects'][0]
+ return self._parse_episode_data(episode_data)
+ else:
+ show_data = self._call_api('getepisodesbyshow', show_id, {
+ 'max': 1000000000,
+ 'showid': show_id,
+ })
+ entries = []
+ for episode_data in show_data.get('objects', []):
+ entries.append(self._parse_episode_data(episode_data))
+ return self.playlist_result(entries, show_id, show_data.get('show_name'))
diff --git a/hypervideo_dl/extractor/toutv.py b/hypervideo_dl/extractor/toutv.py
new file mode 100644
index 0000000..44b022f
--- /dev/null
+++ b/hypervideo_dl/extractor/toutv.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .radiocanada import RadioCanadaIE
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+)
+
+
+class TouTvIE(RadioCanadaIE):
+ _NETRC_MACHINE = 'toutv'
+ IE_NAME = 'tou.tv'
+ _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
+
+ _TESTS = [{
+ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
+ 'info_dict': {
+ 'id': '122017',
+ 'ext': 'mp4',
+ 'title': 'Saison 2015 Épisode 17',
+ 'description': 'La photo de famille 2',
+ 'upload_date': '20100717',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ 'url': 'http://ici.tou.tv/hackers',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
+ 'only_matching': True,
+ }]
+ _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4'
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+ try:
+ self._access_token = self._download_json(
+ 'https://services.radio-canada.ca/toutv/profiling/accounts/login',
+ None, 'Logging in', data=json.dumps({
+ 'ClientId': self._CLIENT_KEY,
+ 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
+ 'Email': email,
+ 'Password': password,
+ 'Scope': 'id.write media-validation.read',
+ }).encode(), headers={
+ 'Authorization': 'client-key ' + self._CLIENT_KEY,
+ 'Content-Type': 'application/json;charset=utf-8',
+ })['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)['Message']
+ raise ExtractorError(error, expected=True)
+ raise
+ self._claims = self._call_api('validation/v2/getClaims')['claims']
+
+ def _real_extract(self, url):
+ path = self._match_id(url)
+ metadata = self._download_json(
+ 'https://services.radio-canada.ca/toutv/presentation/%s' % path, path, query={
+ 'client_key': self._CLIENT_KEY,
+ 'device': 'web',
+ 'version': 4,
+ })
+ # IsDrm does not necessarily mean the video is DRM protected (see
+ # https://github.com/ytdl-org/youtube-dl/issues/13994).
+ if metadata.get('IsDrm'):
+ self.report_warning('This video is probably DRM protected.', path)
+ video_id = metadata['IdMedia']
+ details = metadata['Details']
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': details.get('OriginalTitle'),
+ 'description': details.get('Description'),
+ 'thumbnail': details.get('ImageUrl'),
+ 'duration': int_or_none(details.get('LengthInSeconds')),
+ 'series': metadata.get('ProgramTitle'),
+ 'season_number': int_or_none(metadata.get('SeasonNumber')),
+ 'season': metadata.get('SeasonTitle'),
+ 'episode_number': int_or_none(metadata.get('EpisodeNumber')),
+ 'episode': metadata.get('EpisodeTitle'),
+ }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
diff --git a/hypervideo_dl/extractor/toypics.py b/hypervideo_dl/extractor/toypics.py
new file mode 100644
index 0000000..f705a06
--- /dev/null
+++ b/hypervideo_dl/extractor/toypics.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+import re
+
+
+class ToypicsIE(InfoExtractor):
+ IE_DESC = 'Toypics video'
+ _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
+ 'md5': '16e806ad6d6f58079d210fe30985e08b',
+ 'info_dict': {
+ 'id': '514',
+ 'ext': 'mp4',
+ 'title': "Chance-Bulge'd, 2",
+ 'age_limit': 18,
+ 'uploader': 'kidsune',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+ title = self._html_search_regex([
+ r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h',
+ r'<title>([^<]+) - Toypics</title>',
+ ], webpage, 'title')
+
+ uploader = self._html_search_regex(
+ r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader',
+ fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'uploader': uploader,
+ 'age_limit': 18,
+ }
+
+
+class ToypicsUserIE(InfoExtractor):
+ IE_DESC = 'Toypics user profile'
+ _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://videos.toypics.net/Mikey',
+ 'info_dict': {
+ 'id': 'Mikey',
+ },
+ 'playlist_mincount': 19,
+ }
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ profile_page = self._download_webpage(
+ url, username, note='Retrieving profile page')
+
+ video_count = int(self._search_regex(
+ r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
+ 'video count'))
+
+ PAGE_SIZE = 8
+ urls = []
+ page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+ for n in range(1, page_count + 1):
+ lpage_url = url + '/public/%d' % n
+ lpage = self._download_webpage(
+ lpage_url, username,
+ note='Downloading page %d/%d' % (n, page_count))
+ urls.extend(
+ re.findall(
+ r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"',
+ lpage))
+
+ return {
+ '_type': 'playlist',
+ 'id': username,
+ 'entries': [{
+ '_type': 'url',
+ 'url': eurl,
+ 'ie_key': 'Toypics',
+ } for eurl in urls]
+ }
diff --git a/hypervideo_dl/extractor/traileraddict.py b/hypervideo_dl/extractor/traileraddict.py
new file mode 100644
index 0000000..747370d
--- /dev/null
+++ b/hypervideo_dl/extractor/traileraddict.py
@@ -0,0 +1,64 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TrailerAddictIE(InfoExtractor):
+ _WORKING = False
+ _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
+ _TEST = {
+ 'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
+ 'md5': '41365557f3c8c397d091da510e73ceb4',
+ 'info_dict': {
+ 'id': '76184',
+ 'ext': 'mp4',
+ 'title': 'Prince Avalanche Trailer',
+ 'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('movie') + '/' + mobj.group('trailer_name')
+ webpage = self._download_webpage(url, name)
+
+ title = self._search_regex(r'<title>(.+?)</title>',
+ webpage, 'video title').replace(' - Trailer Addict', '')
+ view_count_str = self._search_regex(
+ r'<span class="views_n">([0-9,.]+)</span>',
+ webpage, 'view count', fatal=False)
+ view_count = (
+ None if view_count_str is None
+ else int(view_count_str.replace(',', '')))
+ video_id = self._search_regex(
+ r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
+ webpage, 'video id')
+
+ # Presence of (no)watchplus function indicates HD quality is available
+ if re.search(r'function (no)?watchplus()', webpage):
+ fvar = 'fvarhd'
+ else:
+ fvar = 'fvar'
+
+ info_url = 'http://www.traileraddict.com/%s.php?tid=%s' % (fvar, str(video_id))
+ info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage')
+
+ final_url = self._search_regex(r'&fileurl=(.+)',
+ info_webpage, 'Download url').replace('%3F', '?')
+ thumbnail_url = self._search_regex(r'&image=(.+?)&',
+ info_webpage, 'thumbnail url')
+
+ description = self._html_search_regex(
+ r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
+ webpage, 'description', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': final_url,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
+ 'description': description,
+ 'view_count': view_count,
+ }
diff --git a/hypervideo_dl/extractor/trilulilu.py b/hypervideo_dl/extractor/trilulilu.py
new file mode 100644
index 0000000..a800449
--- /dev/null
+++ b/hypervideo_dl/extractor/trilulilu.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class TriluliluIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P<id>[^/#\?]+)'
+ _TESTS = [{
+ 'url': 'http://www.trilulilu.ro/big-buck-bunny-1',
+ 'md5': '68da087b676a6196a413549212f60cc6',
+ 'info_dict': {
+ 'id': 'ae2899e124140b',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': ':) pentru copilul din noi',
+ 'uploader_id': 'chipy',
+ 'upload_date': '20120304',
+ 'timestamp': 1330830647,
+ 'uploader': 'chipy',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta',
+ 'md5': '929dfb8729dc71750463af88bbbbf4a4',
+ 'info_dict': {
+ 'id': 'f299710e3c91c5',
+ 'ext': 'mp4',
+ 'title': 'Adena ft. Morreti - Inocenta',
+ 'description': 'pop music',
+ 'uploader_id': 'VEVOmixt',
+ 'upload_date': '20151204',
+ 'uploader': 'VEVOmixt',
+ 'timestamp': 1449187937,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id)
+
+ age_limit = 0
+ errors = media_info.get('errors', {})
+ if errors.get('friends'):
+ raise ExtractorError('This video is private.', expected=True)
+ elif errors.get('geoblock'):
+ raise ExtractorError('This video is not available in your country.', expected=True)
+ elif errors.get('xxx_unlogged'):
+ age_limit = 18
+
+ media_class = media_info.get('class')
+ if media_class not in ('video', 'audio'):
+ raise ExtractorError('not a video or an audio')
+
+ user = media_info.get('user', {})
+
+ thumbnail = media_info.get('cover_url')
+ if thumbnail:
+ thumbnail.format(width='1600', height='1200')
+
+ # TODO: get correct ext for audio files
+ stream_type = media_info.get('stream_type')
+ formats = [{
+ 'url': media_info['href'],
+ 'ext': stream_type,
+ }]
+ if media_info.get('is_hd'):
+ formats.append({
+ 'format_id': 'hd',
+ 'url': media_info['hrefhd'],
+ 'ext': stream_type,
+ })
+ if media_class == 'audio':
+ formats[0]['vcodec'] = 'none'
+ else:
+ formats[0]['format_id'] = 'sd'
+
+ return {
+ 'id': media_info['identifier'].split('|')[1],
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': media_info['title'],
+ 'description': media_info.get('description'),
+ 'thumbnail': thumbnail,
+ 'uploader_id': user.get('username'),
+ 'uploader': user.get('fullname'),
+ 'timestamp': parse_iso8601(media_info.get('published'), ' '),
+ 'duration': int_or_none(media_info.get('duration')),
+ 'view_count': int_or_none(media_info.get('count_views')),
+ 'like_count': int_or_none(media_info.get('count_likes')),
+ 'comment_count': int_or_none(media_info.get('count_comments')),
+ 'age_limit': age_limit,
+ }
diff --git a/hypervideo_dl/extractor/trovo.py b/hypervideo_dl/extractor/trovo.py
new file mode 100644
index 0000000..de0107a
--- /dev/null
+++ b/hypervideo_dl/extractor/trovo.py
@@ -0,0 +1,194 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class TrovoBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
+
+ def _extract_streamer_info(self, data):
+ streamer_info = data.get('streamerInfo') or {}
+ username = streamer_info.get('userName')
+ return {
+ 'uploader': streamer_info.get('nickName'),
+ 'uploader_id': str_or_none(streamer_info.get('uid')),
+ 'uploader_url': 'https://trovo.live/' + username if username else None,
+ }
+
+
+class TrovoIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ live_info = self._download_json(
+ 'https://gql.trovo.live/', username, query={
+ 'query': '''{
+ getLiveInfo(params: {userName: "%s"}) {
+ isLive
+ programInfo {
+ coverUrl
+ id
+ streamInfo {
+ desc
+ playUrl
+ }
+ title
+ }
+ streamerInfo {
+ nickName
+ uid
+ userName
+ }
+ }
+}''' % username,
+ })['data']['getLiveInfo']
+ if live_info.get('isLive') == 0:
+ raise ExtractorError('%s is offline' % username, expected=True)
+ program_info = live_info['programInfo']
+ program_id = program_info['id']
+ title = self._live_title(program_info['title'])
+
+ formats = []
+ for stream_info in (program_info.get('streamInfo') or []):
+ play_url = stream_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = stream_info.get('desc')
+ formats.append({
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'url': play_url,
+ })
+ self._sort_formats(formats)
+
+ info = {
+ 'id': program_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': program_info.get('coverUrl'),
+ 'is_live': True,
+ }
+ info.update(self._extract_streamer_info(live_info))
+ return info
+
+
+class TrovoVodIE(TrovoBaseIE):
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+ 'info_dict': {
+ 'id': 'ltv-100095501_100095501_1609596043',
+ 'ext': 'mp4',
+ 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
+ 'uploader': 'Exsl',
+ 'timestamp': 1609640305,
+ 'upload_date': '20210103',
+ 'uploader_id': '100095501',
+ 'duration': 43977,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'comments': 'mincount:8',
+ 'categories': ['Grand Theft Auto V'],
+ },
+ }, {
+ 'url': 'https://trovo.live/clip/lc-5285890810184026005',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ vid = self._match_id(url)
+ resp = self._download_json(
+ 'https://gql.trovo.live/', vid, data=json.dumps([{
+ 'query': '''{
+ batchGetVodDetailInfo(params: {vids: ["%s"]}) {
+ VodDetailInfos
+ }
+}''' % vid,
+ }, {
+ 'query': '''{
+ getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
+ commentList {
+ author {
+ nickName
+ uid
+ }
+ commentID
+ content
+ createdAt
+ parentID
+ }
+ }
+}''' % vid,
+ }]).encode(), headers={
+ 'Content-Type': 'application/json',
+ })
+ vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
+ vod_info = vod_detail_info['vodInfo']
+ title = vod_info['title']
+
+ language = vod_info.get('languageName')
+ formats = []
+ for play_info in (vod_info.get('playInfos') or []):
+ play_url = play_info.get('playUrl')
+ if not play_url:
+ continue
+ format_id = play_info.get('desc')
+ formats.append({
+ 'ext': 'mp4',
+ 'filesize': int_or_none(play_info.get('fileSize')),
+ 'format_id': format_id,
+ 'height': int_or_none(format_id[:-1]) if format_id else None,
+ 'language': language,
+ 'protocol': 'm3u8_native',
+ 'tbr': int_or_none(play_info.get('bitrate')),
+ 'url': play_url,
+ 'http_headers': {'Origin': 'https://trovo.live'},
+ })
+ self._sort_formats(formats)
+
+ category = vod_info.get('categoryName')
+ get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
+
+ comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
+ comments = []
+ for comment in comment_list:
+ content = comment.get('content')
+ if not content:
+ continue
+ author = comment.get('author') or {}
+ parent = comment.get('parentID')
+ comments.append({
+ 'author': author.get('nickName'),
+ 'author_id': str_or_none(author.get('uid')),
+ 'id': str_or_none(comment.get('commentID')),
+ 'text': content,
+ 'timestamp': int_or_none(comment.get('createdAt')),
+ 'parent': 'root' if parent == 0 else str_or_none(parent),
+ })
+
+ info = {
+ 'id': vid,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': vod_info.get('coverUrl'),
+ 'timestamp': int_or_none(vod_info.get('publishTs')),
+ 'duration': int_or_none(vod_info.get('duration')),
+ 'view_count': get_count('watch'),
+ 'like_count': get_count('like'),
+ 'comment_count': get_count('comment'),
+ 'comments': comments,
+ 'categories': [category] if category else None,
+ }
+ info.update(self._extract_streamer_info(vod_detail_info))
+ return info
diff --git a/hypervideo_dl/extractor/trunews.py b/hypervideo_dl/extractor/trunews.py
new file mode 100644
index 0000000..cca5b5c
--- /dev/null
+++ b/hypervideo_dl/extractor/trunews.py
@@ -0,0 +1,34 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TruNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'info_dict': {
+ 'id': '5c5a21e65d3c196e1c0020cc',
+ 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'ext': 'mp4',
+ 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
+ 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
+ 'duration': 3685,
+ 'timestamp': 1549411440,
+ 'upload_date': '20190206',
+ },
+ 'add_ie': ['Zype'],
+ }
+ _ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ zype_id = self._download_json(
+ 'https://api.zype.com/videos', display_id, query={
+ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
+ 'per_page': 1,
+ 'active': 'true',
+ 'friendly_title': display_id,
+ })['response'][0]['_id']
+ return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id)
diff --git a/hypervideo_dl/extractor/trutv.py b/hypervideo_dl/extractor/trutv.py
new file mode 100644
index 0000000..ce892c8
--- /dev/null
+++ b/hypervideo_dl/extractor/trutv.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class TruTVIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
+ _TEST = {
+ 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
+ 'info_dict': {
+ 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
+ 'ext': 'mp4',
+ 'title': 'Sunlight-Activated Flower',
+ 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
+
+ if video_id:
+ path = 'episode'
+ display_id = video_id
+ else:
+ path = 'series/clip'
+ display_id = clip_slug
+
+ data = self._download_json(
+ 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
+ display_id)
+ video_data = data['episode'] if video_id else data['info']
+ media_id = video_data['mediaId']
+ title = video_data['title'].strip()
+
+ info = self._extract_ngtv_info(
+ media_id, {}, {
+ 'url': url,
+ 'site_name': 'truTV',
+ 'auth_required': video_data.get('isAuthRequired'),
+ })
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('srcUrl')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video_data.get('publicationDate')),
+ 'series': video_data.get('showTitle'),
+ 'season_number': int_or_none(video_data.get('seasonNum')),
+ 'episode_number': int_or_none(video_data.get('episodeNum')),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/tube8.py b/hypervideo_dl/extractor/tube8.py
new file mode 100644
index 0000000..db93b01
--- /dev/null
+++ b/hypervideo_dl/extractor/tube8.py
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+
+import re
+
+from ..utils import (
+ int_or_none,
+ str_to_int,
+)
+from .keezmovies import KeezMoviesIE
+
+
+class Tube8IE(KeezMoviesIE):
+ _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+ 'md5': '65e20c48e6abff62ed0c3965fff13a39',
+ 'info_dict': {
+ 'id': '229795',
+ 'display_id': 'kasia-music-video',
+ 'ext': 'mp4',
+ 'description': 'hot teen Kasia grinding',
+ 'uploader': 'unknown',
+ 'title': 'Kasia music video',
+ 'age_limit': 18,
+ 'duration': 230,
+ 'categories': ['Teen'],
+ 'tags': ['dancing'],
+ },
+ }, {
+ 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ webpage, info = self._extract_info(url)
+
+ if not info['title']:
+ info['title'] = self._html_search_regex(
+ r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
+
+ description = self._html_search_regex(
+ r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
+ uploader = self._html_search_regex(
+ r'<span class="username">\s*(.+?)\s*<',
+ webpage, 'uploader', fatal=False)
+
+ like_count = int_or_none(self._search_regex(
+ r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
+ dislike_count = int_or_none(self._search_regex(
+ r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
+ view_count = str_to_int(self._search_regex(
+ r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._search_regex(
+ r'<span id="allCommentsCount">(\d+)</span>',
+ webpage, 'comment count', fatal=False))
+
+ category = self._search_regex(
+ r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
+ webpage, 'category', fatal=False)
+ categories = [category] if category else None
+
+ tags_str = self._search_regex(
+ r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
+ webpage, 'tags', fatal=False)
+ tags = [t for t in re.findall(
+ r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
+
+ info.update({
+ 'description': description,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'tags': tags,
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/tubitv.py b/hypervideo_dl/extractor/tubitv.py
new file mode 100644
index 0000000..ebfb05c
--- /dev/null
+++ b/hypervideo_dl/extractor/tubitv.py
@@ -0,0 +1,110 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ sanitized_Request,
+ urlencode_postdata,
+)
+
+
+class TubiTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
+ _LOGIN_URL = 'http://tubitv.com/login'
+ _NETRC_MACHINE = 'tubitv'
+ _GEO_COUNTRIES = ['US']
+ _TESTS = [{
+ 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
+ 'md5': '43ac06be9326f41912dc64ccf7a80320',
+ 'info_dict': {
+ 'id': '283829',
+ 'ext': 'mp4',
+ 'title': 'The Comedian at The Friday',
+ 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
+ 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
+ },
+ }, {
+ 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tubitv.com/movies/383676/tracker',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
+ 'info_dict': {
+ 'id': '560057',
+ 'ext': 'mp4',
+ 'title': 'Penitentiary',
+ 'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9',
+ 'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
+ 'release_year': 1979,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ form_data = {
+ 'username': username,
+ 'password': password,
+ }
+ payload = urlencode_postdata(form_data)
+ request = sanitized_Request(self._LOGIN_URL, payload)
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ login_page = self._download_webpage(
+ request, None, False, 'Wrong login info')
+ if not re.search(r'id="tubi-logout"', login_page):
+ raise ExtractorError(
+ 'Login failed (invalid username/password)', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
+ title = video_data['title']
+
+ formats = self._extract_m3u8_formats(
+ self._proto_relative_url(video_data['url']),
+ video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumbnail_url in video_data.get('thumbnails', []):
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail_url),
+ })
+
+ subtitles = {}
+ for sub in video_data.get('subtitles', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('lang', 'English'), []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': video_data.get('publisher_id'),
+ 'release_year': int_or_none(video_data.get('year')),
+ }
diff --git a/hypervideo_dl/extractor/tudou.py b/hypervideo_dl/extractor/tudou.py
new file mode 100644
index 0000000..7421378
--- /dev/null
+++ b/hypervideo_dl/extractor/tudou.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TudouPlaylistIE(InfoExtractor):
+ IE_NAME = 'tudou:playlist'
+ _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html'
+ _TESTS = [{
+ 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html',
+ 'info_dict': {
+ 'id': 'zzdE77v6Mmo',
+ },
+ 'playlist_mincount': 209,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist_data = self._download_json(
+ 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id)
+ entries = [self.url_result(
+ 'http://www.tudou.com/programs/view/%s' % item['icode'],
+ 'Tudou', item['icode'],
+ item['kw']) for item in playlist_data['items']]
+ return self.playlist_result(entries, playlist_id)
+
+
+class TudouAlbumIE(InfoExtractor):
+ IE_NAME = 'tudou:album'
+ _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})'
+ _TESTS = [{
+ 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html',
+ 'info_dict': {
+ 'id': 'v5qckFJvNJg',
+ },
+ 'playlist_mincount': 45,
+ }]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ album_data = self._download_json(
+ 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id)
+ entries = [self.url_result(
+ 'http://www.tudou.com/programs/view/%s' % item['icode'],
+ 'Tudou', item['icode'],
+ item['kw']) for item in album_data['items']]
+ return self.playlist_result(entries, album_id)
diff --git a/hypervideo_dl/extractor/tumblr.py b/hypervideo_dl/extractor/tumblr.py
new file mode 100644
index 0000000..ae584ad
--- /dev/null
+++ b/hypervideo_dl/extractor/tumblr.py
@@ -0,0 +1,213 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata
+)
+
+
+class TumblrIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+ _NETRC_MACHINE = 'tumblr'
+ _LOGIN_URL = 'https://www.tumblr.com/login'
+ _TESTS = [{
+ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+ 'md5': '479bb068e5b16462f5176a6828829767',
+ 'info_dict': {
+ 'id': '54196191430',
+ 'ext': 'mp4',
+ 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
+ 'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
+ 'thumbnail': r're:http://.*\.jpg',
+ }
+ }, {
+ 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
+ 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
+ 'info_dict': {
+ 'id': '90208453769',
+ 'ext': 'mp4',
+ 'title': '5SOS STRUM ;]',
+ 'description': 'md5:dba62ac8639482759c8eb10ce474586a',
+ 'thumbnail': r're:http://.*\.jpg',
+ }
+ }, {
+ 'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video',
+ 'md5': '7ae503065ad150122dc3089f8cf1546c',
+ 'info_dict': {
+ 'id': '130323439814',
+ 'ext': 'mp4',
+ 'title': 'HD Video Testing \u2014 Test description for my HD video',
+ 'description': 'md5:97cc3ab5fcd27ee4af6356701541319c',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ 'params': {
+ 'format': 'hd',
+ },
+ }, {
+ 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
+ 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
+ 'info_dict': {
+ 'id': 'Wmur',
+ 'ext': 'mp4',
+ 'title': 'naked smoking & stretching',
+ 'upload_date': '20150506',
+ 'timestamp': 1430931613,
+ 'age_limit': 18,
+ 'uploader_id': '1638622',
+ 'uploader': 'naked-yogi',
+ },
+ 'add_ie': ['Vidme'],
+ }, {
+ 'url': 'http://camdamage.tumblr.com/post/98846056295/',
+ 'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
+ 'info_dict': {
+ 'id': '105463834',
+ 'ext': 'mp4',
+ 'title': 'Cam Damage-HD 720p',
+ 'uploader': 'John Moyer',
+ 'uploader_id': 'user32021558',
+ },
+ 'add_ie': ['Vimeo'],
+ }, {
+ 'url': 'http://sutiblr.tumblr.com/post/139638707273',
+ 'md5': '2dd184b3669e049ba40563a7d423f95c',
+ 'info_dict': {
+ 'id': 'ir7qBEIKqvq',
+ 'ext': 'mp4',
+ 'title': 'Vine by sutiblr',
+ 'alt_title': 'Vine by sutiblr',
+ 'uploader': 'sutiblr',
+ 'uploader_id': '1198993975374495744',
+ 'upload_date': '20160220',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or',
+ 'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72',
+ 'info_dict': {
+ 'id': '-7LnUPGlSo',
+ 'ext': 'mp4',
+ 'title': 'Video by victoriassecret',
+ 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat',
+ 'uploader_id': 'victoriassecret',
+ 'thumbnail': r're:^https?://.*\.jpg'
+ },
+ 'add_ie': ['Instagram'],
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password
+ })
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+
+ # Successful login
+ if '/dashboard' in urlh.geturl():
+ return
+
+ login_errors = self._parse_json(
+ self._search_regex(
+ r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response,
+ 'login errors', default='[]'),
+ None, fatal=False)
+ if login_errors:
+ raise ExtractorError(
+ 'Unable to login: %s' % login_errors[0], expected=True)
+
+ self.report_warning('Login has probably failed')
+
+ def _real_extract(self, url):
+ m_url = re.match(self._VALID_URL, url)
+ video_id = m_url.group('id')
+ blog = m_url.group('blog_name')
+
+ url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ redirect_url = urlh.geturl()
+ if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
+ raise ExtractorError(
+ 'This Tumblr may contain sensitive media. '
+ 'Disable safe mode in your account settings '
+ 'at https://www.tumblr.com/settings/account#safe_mode',
+ expected=True)
+
+ iframe_url = self._search_regex(
+ r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
+ webpage, 'iframe url', default=None)
+ if iframe_url is None:
+ return self.url_result(redirect_url, 'Generic')
+
+ iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')
+
+ duration = None
+ sources = []
+
+ sd_url = self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe,
+ 'sd video url', default=None, group='url')
+ if sd_url:
+ sources.append((sd_url, 'sd'))
+
+ options = self._parse_json(
+ self._search_regex(
+ r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe,
+ 'hd video url', default='', group='options'),
+ video_id, fatal=False)
+ if options:
+ duration = int_or_none(options.get('duration'))
+ hd_url = options.get('hdUrl')
+ if hd_url:
+ sources.append((hd_url, 'hd'))
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'/(\d{3,4})$', video_url, 'height', default=None)),
+ 'quality': quality,
+ } for quality, (video_url, format_id) in enumerate(sources)]
+
+ self._sort_formats(formats)
+
+ # The only place where you can get a title, it's not complete,
+ # but searching in other places doesn't work for all videos
+ video_title = self._html_search_regex(
+ r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
+ webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tunein.py b/hypervideo_dl/extractor/tunein.py
new file mode 100644
index 0000000..c7a5f5a
--- /dev/null
+++ b/hypervideo_dl/extractor/tunein.py
@@ -0,0 +1,183 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from ..compat import compat_urlparse
+
+
+class TuneInBaseIE(InfoExtractor):
+ _API_BASE_URL = 'http://tunein.com/tuner/tune/'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+
+ content_info = self._download_json(
+ self._API_BASE_URL + self._API_URL_QUERY % content_id,
+ content_id, note='Downloading JSON metadata')
+
+ title = content_info['Title']
+ thumbnail = content_info.get('Logo')
+ location = content_info.get('Location')
+ streams_url = content_info.get('StreamUrl')
+ if not streams_url:
+ raise ExtractorError('No downloadable streams found', expected=True)
+ if not streams_url.startswith('http://'):
+ streams_url = compat_urlparse.urljoin(url, streams_url)
+
+ streams = self._download_json(
+ streams_url, content_id, note='Downloading stream data',
+ transform_source=lambda s: re.sub(r'^\s*\((.*)\);\s*$', r'\1', s))['Streams']
+
+ is_live = None
+ formats = []
+ for stream in streams:
+ if stream.get('Type') == 'Live':
+ is_live = True
+ reliability = stream.get('Reliability')
+ format_note = (
+ 'Reliability: %d%%' % reliability
+ if reliability is not None else None)
+ formats.append({
+ 'preference': (
+ 0 if reliability is None or reliability > 90
+ else 1),
+ 'abr': stream.get('Bandwidth'),
+ 'ext': stream.get('MediaType').lower(),
+ 'acodec': stream.get('MediaType'),
+ 'vcodec': 'none',
+ 'url': stream.get('Url'),
+ 'source_preference': reliability,
+ 'format_note': format_note,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': content_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'location': location,
+ 'is_live': is_live,
+ }
+
+
+class TuneInClipIE(TuneInBaseIE):
+ IE_NAME = 'tunein:clip'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s'
+
+ _TESTS = [{
+ 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816',
+ 'md5': '99f00d772db70efc804385c6b47f4e77',
+ 'info_dict': {
+ 'id': '816',
+ 'title': '32m',
+ 'ext': 'mp3',
+ },
+ }]
+
+
+class TuneInStationIE(TuneInBaseIE):
+ IE_NAME = 'tunein:station'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId=|embed/player/s)(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=Station&stationId=%s'
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url)
+
+ _TESTS = [{
+ 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
+ 'info_dict': {
+ 'id': '34682',
+ 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+ 'ext': 'mp3',
+ 'location': 'Tacoma, WA',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ }, {
+ 'url': 'http://tunein.com/embed/player/s6404/',
+ 'only_matching': True,
+ }]
+
+
+class TuneInProgramIE(TuneInBaseIE):
+ IE_NAME = 'tunein:program'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId=|embed/player/p)(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=Program&programId=%s'
+
+ _TESTS = [{
+ 'url': 'http://tunein.com/radio/Jazz-24-p2506/',
+ 'info_dict': {
+ 'id': '2506',
+ 'title': 'Jazz 24 on 91.3 WUKY-HD3',
+ 'ext': 'mp3',
+ 'location': 'Lexington, KY',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ }, {
+ 'url': 'http://tunein.com/embed/player/p191660/',
+ 'only_matching': True,
+ }]
+
+
+class TuneInTopicIE(TuneInBaseIE):
+ IE_NAME = 'tunein:topic'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:topic/.*?TopicId=|embed/player/t)(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=Topic&topicId=%s'
+
+ _TESTS = [{
+ 'url': 'http://tunein.com/topic/?TopicId=101830576',
+ 'md5': 'c31a39e6f988d188252eae7af0ef09c9',
+ 'info_dict': {
+ 'id': '101830576',
+ 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)',
+ 'ext': 'mp3',
+ 'location': 'Belgium',
+ },
+ }, {
+ 'url': 'http://tunein.com/embed/player/t101830576/',
+ 'only_matching': True,
+ }]
+
+
+class TuneInShortenerIE(InfoExtractor):
+ IE_NAME = 'tunein:shortener'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://tun\.in/(?P<id>[A-Za-z0-9]+)'
+
+ _TEST = {
+ # test redirection
+ 'url': 'http://tun.in/ser7s',
+ 'info_dict': {
+ 'id': '34682',
+ 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+ 'ext': 'mp3',
+ 'location': 'Tacoma, WA',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ }
+
+ def _real_extract(self, url):
+ redirect_id = self._match_id(url)
+ # The server doesn't support HEAD requests
+ urlh = self._request_webpage(
+ url, redirect_id, note='Downloading redirect page')
+ url = urlh.geturl()
+ self.to_screen('Following redirect: %s' % url)
+ return self.url_result(url)
diff --git a/hypervideo_dl/extractor/tunepk.py b/hypervideo_dl/extractor/tunepk.py
new file mode 100644
index 0000000..9d42651
--- /dev/null
+++ b/hypervideo_dl/extractor/tunepk.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class TunePkIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)|
+ embed\.tune\.pk/play/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins',
+ 'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55',
+ 'info_dict': {
+ 'id': '6919541',
+ 'ext': 'mp4',
+ 'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins',
+ 'description': 'md5:eb5a04114fafef5cec90799a93a2d09c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1487327564,
+ 'upload_date': '20170217',
+ 'uploader': 'Movie Trailers',
+ 'duration': 107,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://tune.pk/video/%s' % video_id, video_id)
+
+ details = self._parse_json(
+ self._search_regex(
+ r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'),
+ video_id)['details']
+
+ video = details['video']
+ title = video.get('title') or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+
+ formats = self._parse_jwplayer_formats(
+ details['player']['sources'], video_id)
+ self._sort_formats(formats)
+
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description')
+
+ thumbnail = video.get('thumb') or self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'thumbnail', webpage, 'thumbnail')
+
+ timestamp = unified_timestamp(video.get('date_added'))
+ uploader = try_get(
+ video, lambda x: x['uploader']['name'],
+ compat_str) or self._html_search_meta('author', webpage, 'author')
+
+ duration = int_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('views'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/turbo.py b/hypervideo_dl/extractor/turbo.py
new file mode 100644
index 0000000..be3eaa5
--- /dev/null
+++ b/hypervideo_dl/extractor/turbo.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ xpath_text,
+)
+
+
+class TurboIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
+ _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
+ _TEST = {
+ 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
+ 'md5': '33f4b91099b36b5d5a91f84b5bcba600',
+ 'info_dict': {
+ 'id': '454443',
+ 'ext': 'mp4',
+ 'duration': 3715,
+ 'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+ 'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist = self._download_xml(self._API_URL.format(video_id), video_id)
+ item = playlist.find('./channel/item')
+ if item is None:
+ raise ExtractorError('Playlist item was not found', expected=True)
+
+ title = xpath_text(item, './title', 'title')
+ duration = int_or_none(xpath_text(item, './durate', 'duration'))
+ thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
+ description = self._html_search_meta('description', webpage)
+
+ formats = []
+ get_quality = qualities(['3g', 'sd', 'hq'])
+ for child in item:
+ m = re.search(r'url_video_(?P<quality>.+)', child.tag)
+ if m:
+ quality = compat_str(m.group('quality'))
+ formats.append({
+ 'format_id': quality,
+ 'url': child.text,
+ 'quality': get_quality(quality),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/turner.py b/hypervideo_dl/extractor/turner.py
new file mode 100644
index 0000000..81229a5
--- /dev/null
+++ b/hypervideo_dl/extractor/turner.py
@@ -0,0 +1,260 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .adobepass import AdobePassIE
+from ..compat import compat_str
+from ..utils import (
+ fix_xml_ampersands,
+ xpath_text,
+ int_or_none,
+ determine_ext,
+ float_or_none,
+ parse_duration,
+ xpath_attr,
+ update_url_query,
+ ExtractorError,
+ strip_or_none,
+ url_or_none,
+)
+
+
+class TurnerBaseIE(AdobePassIE):
+ _AKAMAI_SPE_TOKEN_CACHE = {}
+
+ def _extract_timestamp(self, video_data):
+ return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
+
+ def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
+ secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
+ token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
+ if not token:
+ query = {
+ 'path': secure_path,
+ }
+ if custom_tokenizer_query:
+ query.update(custom_tokenizer_query)
+ else:
+ query['videoId'] = content_id
+ if ap_data.get('auth_required'):
+ query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
+ auth = self._download_xml(
+ tokenizer_src, content_id, query=query)
+ error_msg = xpath_text(auth, 'error/msg')
+ if error_msg:
+ raise ExtractorError(error_msg, expected=True)
+ token = xpath_text(auth, 'token')
+ if not token:
+ return video_url
+ self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
+ return video_url + '?hdnea=' + token
+
+ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
+ video_data = self._download_xml(
+ data_src, video_id,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=fatal)
+ if not video_data:
+ return {}
+ video_id = video_data.attrib['id']
+ title = xpath_text(video_data, 'headline', fatal=True)
+ content_id = xpath_text(video_data, 'contentId') or video_id
+ # rtmp_src = xpath_text(video_data, 'akamai/src')
+ # if rtmp_src:
+ # split_rtmp_src = rtmp_src.split(',')
+ # if len(split_rtmp_src) == 2:
+ # rtmp_src = split_rtmp_src[1]
+ # aifp = xpath_text(video_data, 'akamai/aifp', default='')
+
+ urls = []
+ formats = []
+ thumbnails = []
+ subtitles = {}
+ rex = re.compile(
+ r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
+ # Possible formats locations: files/file, files/groupFiles/files
+ # and maybe others
+ for video_file in video_data.findall('.//file'):
+ video_url = url_or_none(video_file.text.strip())
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if video_url.startswith('/mp4:protected/'):
+ continue
+ # TODO Correct extraction for these files
+ # protected_path_data = path_data.get('protected')
+ # if not protected_path_data or not rtmp_src:
+ # continue
+ # protected_path = self._search_regex(
+ # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path')
+ # auth = self._download_webpage(
+ # protected_path_data['tokenizer_src'], query={
+ # 'path': protected_path,
+ # 'videoId': content_id,
+ # 'aifp': aifp,
+ # })
+ # token = xpath_text(auth, 'token')
+ # if not token:
+ # continue
+ # video_url = rtmp_src + video_url + '?' + token
+ elif video_url.startswith('/secure/'):
+ secure_path_data = path_data.get('secure')
+ if not secure_path_data:
+ continue
+ video_url = self._add_akamai_spe_token(
+ secure_path_data['tokenizer_src'],
+ secure_path_data['media_src'] + video_url,
+ content_id, ap_data)
+ elif not re.match('https?://', video_url):
+ base_path_data = path_data.get(ext, path_data.get('default', {}))
+ media_src = base_path_data.get('media_src')
+ if not media_src:
+ continue
+ video_url = media_src + video_url
+ if video_url in urls:
+ continue
+ urls.append(video_url)
+ format_id = video_file.get('bitrate')
+ if ext in ('scc', 'srt', 'vtt'):
+ subtitles.setdefault('en', []).append({
+ 'ext': ext,
+ 'url': video_url,
+ })
+ elif ext == 'png':
+ thumbnails.append({
+ 'id': format_id,
+ 'url': video_url,
+ })
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ video_url, video_id, fatal=False))
+ elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url):
+ formats.extend(self._extract_akamai_formats(
+ video_url, video_id, {
+ 'hds': path_data.get('f4m', {}).get('host'),
+ # nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
+ # ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
+ # ssl.cdn.turner.com
+ 'http': 'pmd.cdn.turner.com',
+ }))
+ elif ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ m3u8_id=format_id or 'hls', fatal=False)
+ if '/secure/' in video_url and '?hdnea=' in video_url:
+ for f in m3u8_formats:
+ f['_seekable'] = False
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(video_url, {'hdcore': '3.7.0'}),
+ video_id, f4m_id=format_id or 'hds', fatal=False))
+ else:
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ }
+ mobj = rex.search(video_url)
+ if mobj:
+ f.update({
+ 'width': int(mobj.group('width')),
+ 'height': int(mobj.group('height')),
+ 'tbr': int_or_none(mobj.group('bitrate')),
+ })
+ elif isinstance(format_id, compat_str):
+ if format_id.isdigit():
+ f['tbr'] = int(format_id)
+ else:
+ mobj = re.match(r'ios_(audio|[0-9]+)$', format_id)
+ if mobj:
+ if mobj.group(1) == 'audio':
+ f.update({
+ 'vcodec': 'none',
+ 'ext': 'm4a',
+ })
+ else:
+ f['tbr'] = int(mobj.group(1))
+ formats.append(f)
+ self._sort_formats(formats)
+
+ for source in video_data.findall('closedCaptions/source'):
+ for track in source.findall('track'):
+ track_url = url_or_none(track.get('url'))
+ if not track_url or track_url.endswith('/big'):
+ continue
+ lang = track.get('lang') or track.get('label') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': track_url,
+ 'ext': {
+ 'scc': 'scc',
+ 'webvtt': 'vtt',
+ 'smptett': 'tt',
+ }.get(source.get('format'))
+ })
+
+ thumbnails.extend({
+ 'id': image.get('cut') or image.get('name'),
+ 'url': image.text,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in video_data.findall('images/image'))
+
+ is_live = xpath_text(video_data, 'isLive') == 'true'
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'thumbnail': xpath_text(video_data, 'poster'),
+ 'description': strip_or_none(xpath_text(video_data, 'description')),
+ 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
+ 'timestamp': self._extract_timestamp(video_data),
+ 'upload_date': xpath_attr(video_data, 'metas', 'version'),
+ 'series': xpath_text(video_data, 'showTitle'),
+ 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+ 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+ 'is_live': is_live,
+ }
+
+ def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+ streams_data = self._download_json(
+ 'http://medium.ngtv.io/media/%s/tv' % media_id,
+ media_id)['media']['tv']
+ duration = None
+ chapters = []
+ formats = []
+ for supported_type in ('unprotected', 'bulkaes'):
+ stream_data = streams_data.get(supported_type, {})
+ m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+ if not m3u8_url:
+ continue
+ if stream_data.get('playlistProtection') == 'spe':
+ m3u8_url = self._add_akamai_spe_token(
+ 'http://token.ngtv.io/token/token_spe',
+ m3u8_url, media_id, ap_data or {}, tokenizer_query)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ duration = float_or_none(stream_data.get('totalRuntime'))
+
+ if not chapters:
+ for chapter in stream_data.get('contentSegments', []):
+ start_time = float_or_none(chapter.get('start'))
+ chapter_duration = float_or_none(chapter.get('duration'))
+ if start_time is None or chapter_duration is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': start_time + chapter_duration,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'formats': formats,
+ 'chapters': chapters,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/tv2.py b/hypervideo_dl/extractor/tv2.py
new file mode 100644
index 0000000..334b7d5
--- /dev/null
+++ b/hypervideo_dl/extractor/tv2.py
@@ -0,0 +1,248 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ js_to_json,
+ parse_iso8601,
+ remove_end,
+ strip_or_none,
+ try_get,
+)
+
+
+class TV2IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tv2.no/v/916509/',
+ 'info_dict': {
+ 'id': '916509',
+ 'ext': 'flv',
+ 'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
+ 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
+ 'timestamp': 1431715610,
+ 'upload_date': '20150515',
+ 'duration': 156.967,
+ 'view_count': int,
+ 'categories': list,
+ },
+ }]
+ _API_DOMAIN = 'sumo.tv2.no'
+ _PROTOCOLS = ('HDS', 'HLS', 'DASH')
+ _GEO_COUNTRIES = ['NO']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
+
+ asset = self._download_json(
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
+ title = asset.get('subtitle') or asset['title']
+ is_live = asset.get('live') is True
+
+ formats = []
+ format_urls = []
+ for protocol in self._PROTOCOLS:
+ try:
+ data = self._download_json(
+ api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
+ video_id, 'Downloading play JSON')['playback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), video_id)['error']
+ error_code = error.get('code')
+ if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif error_code == 'SESSION_NOT_AUTHENTICATED':
+ self.raise_login_required()
+ raise ExtractorError(error['description'])
+ raise
+ items = try_get(data, lambda x: x['items']['item'])
+ if not items:
+ continue
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ video_url = item.get('url')
+ if not video_url or video_url in format_urls:
+ continue
+ format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ format_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ if not data.get('drmProtected'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, format_id, fatal=False))
+ elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+ pass
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': int_or_none(item.get('bitrate')),
+ 'filesize': int_or_none(item.get('fileSize')),
+ })
+ if not formats and data.get('drmProtected'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ self._sort_formats(formats)
+
+ thumbnails = [{
+ 'id': thumbnail.get('@type'),
+ 'url': thumbnail.get('url'),
+ } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': strip_or_none(asset.get('description')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(asset.get('createTime')),
+ 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
+ 'view_count': int_or_none(asset.get('views')),
+ 'categories': asset.get('keywords', '').split(','),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class TV2ArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+ 'info_dict': {
+ 'id': '6930542',
+ 'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret',
+ 'description': 'De fire siktede nekter fortsatt for å ha stjålet pingvinbabyene, men innrømmer å ha åpnet luken til de små kyllingene.',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.tv2.no/a/6930542',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ # Old embed pattern (looks unused nowadays)
+ assets = re.findall(r'data-assetid=["\'](\d+)', webpage)
+
+ if not assets:
+ # New embed pattern
+ for v in re.findall(r'(?s)TV2ContentboxVideo\(({.+?})\)', webpage):
+ video = self._parse_json(
+ v, playlist_id, transform_source=js_to_json, fatal=False)
+ if not video:
+ continue
+ asset = video.get('assetId')
+ if asset:
+ assets.append(asset)
+
+ entries = [
+ self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2')
+ for asset_id in assets]
+
+ title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+ description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class KatsomoIE(TV2IE):
+ _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
+ 'info_dict': {
+ 'id': '1181321',
+ 'ext': 'mp4',
+ 'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
+ 'description': 'Päätöksen teki Pelicansin hallitus.',
+ 'timestamp': 1575116484,
+ 'upload_date': '20191130',
+ 'duration': 37.12,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mtvuutiset.fi/video/prog1311159',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.katsomo.fi/#!/jakso/1311159',
+ 'only_matching': True,
+ }]
+ _API_DOMAIN = 'api.katsomo.fi'
+ _PROTOCOLS = ('HLS', 'MPD')
+ _GEO_COUNTRIES = ['FI']
+
+
+class MTVUutisetArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
+ 'info_dict': {
+ 'id': '1311159',
+ 'ext': 'mp4',
+ 'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+ 'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
+ 'timestamp': 1600608966,
+ 'upload_date': '20200920',
+ 'duration': 153.7886666,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # multiple Youtube embeds
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ article = self._download_json(
+ 'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
+ article_id)
+
+ def entries():
+ for video in (article.get('videos') or []):
+ video_type = video.get('videotype')
+ video_url = video.get('url')
+ if not (video_url and video_type in ('katsomo', 'youtube')):
+ continue
+ yield self.url_result(
+ video_url, video_type.capitalize(), video.get('video_id'))
+
+ return self.playlist_result(
+ entries(), article_id, article.get('title'), article.get('description'))
diff --git a/hypervideo_dl/extractor/tv2dk.py b/hypervideo_dl/extractor/tv2dk.py
new file mode 100644
index 0000000..8bd5fd6
--- /dev/null
+++ b/hypervideo_dl/extractor/tv2dk.py
@@ -0,0 +1,165 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ js_to_json,
+ url_or_none,
+)
+
+
+class TV2DKIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ tvsyd|
+ tv2ostjylland|
+ tvmidtvest|
+ tv2fyn|
+ tv2east|
+ tv2lorry|
+ tv2nord
+ )\.dk/
+ (:[^/]+/)*
+ (?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
+ 'info_dict': {
+ 'id': '0_52jmwa0p',
+ 'ext': 'mp4',
+ 'title': '19:30 - 28. okt. 2019',
+ 'timestamp': 1572290248,
+ 'upload_date': '20191028',
+ 'uploader_id': 'tvsyd',
+ 'duration': 1347,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ entries = []
+
+ def add_entry(partner_id, kaltura_id):
+ entries.append(self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
+ video_id=kaltura_id))
+
+ for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
+ video = extract_attributes(video_el)
+ kaltura_id = video.get('data-entryid')
+ if not kaltura_id:
+ continue
+ partner_id = video.get('data-partnerid')
+ if not partner_id:
+ continue
+ add_entry(partner_id, kaltura_id)
+ if not entries:
+ kaltura_id = self._search_regex(
+ r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
+ partner_id = self._search_regex(
+ (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
+ 'partner id')
+ add_entry(partner_id, kaltura_id)
+ return self.playlist_result(entries)
+
+
+class TV2DKBornholmPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
+ 'info_dict': {
+ 'id': '781021',
+ 'ext': 'mp4',
+ 'title': '12Nyheder-27.11.19',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+ data=json.dumps({
+ 'playlist_id': video_id,
+ 'serienavn': '',
+ }).encode(), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['d']
+
+ # TODO: generalize flowplayer
+ title = self._search_regex(
+ r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
+ group='value')
+ sources = self._parse_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
+ video_id, js_to_json)
+
+ formats = []
+ srcs = set()
+ for source in sources:
+ src = url_or_none(source.get('src'))
+ if not src:
+ continue
+ if src in srcs:
+ continue
+ srcs.add(src)
+ ext = determine_ext(src)
+ src_type = source.get('type')
+ if src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tv2hu.py b/hypervideo_dl/extractor/tv2hu.py
new file mode 100644
index 0000000..86017b7
--- /dev/null
+++ b/hypervideo_dl/extractor/tv2hu.py
@@ -0,0 +1,62 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TV2HuIE(InfoExtractor):
+ IE_NAME = 'tv2.hu'
+ _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html'
+ _TESTS = [{
+ 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html',
+ 'md5': '585e58e2e090f34603804bb2c48e98d8',
+ 'info_dict': {
+ 'id': '217679',
+ 'ext': 'mp4',
+ 'title': 'Ezek megőrültek! - 1. adás 1. rész',
+ 'upload_date': '20160826',
+ 'thumbnail': r're:^https?://.*\.jpg$'
+ }
+ }, {
+ 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html',
+ 'only_matching': True
+ }, {
+ 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_url = self._search_regex(
+ r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url')
+ json_data = self._download_json(json_url, video_id)
+
+ formats = []
+ for b in ('bitrates', 'backupBitrates'):
+ bitrates = json_data.get(b, {})
+ m3u8_url = bitrates.get('hls')
+ if m3u8_url:
+ formats.extend(self._extract_wowza_formats(
+ m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))
+
+ for mp4_url in bitrates.get('mp4', []):
+ height = int_or_none(self._search_regex(
+ r'\.(\d+)p\.mp4', mp4_url, 'height', default=None))
+ formats.append({
+ 'format_id': 'http' + ('-%d' % height if height else ''),
+ 'url': mp4_url,
+ 'height': height,
+ 'width': int_or_none(height / 9.0 * 16.0 if height else None),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage).strip(),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'upload_date': self._search_regex(
+ r'/vod/(\d{8})/', json_url, 'upload_date', default=None),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tv4.py b/hypervideo_dl/extractor/tv4.py
new file mode 100644
index 0000000..b73bab9
--- /dev/null
+++ b/hypervideo_dl/extractor/tv4.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class TV4IE(InfoExtractor):
+ IE_DESC = 'tv4.se and tv4play.se'
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:
+ tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
+ tv4play\.se/
+ (?:
+ (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
+ iframe/video/|
+ film/|
+ sport/|
+ )
+ )(?P<id>[0-9]+)'''
+ _GEO_COUNTRIES = ['SE']
+ _TESTS = [
+ {
+ 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
+ 'info_dict': {
+ 'id': '2491650',
+ 'ext': 'mp4',
+ 'title': 'Kalla Fakta 5 (english subtitles)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': int,
+ 'upload_date': '20131125',
+ },
+ },
+ {
+ 'url': 'http://www.tv4play.se/iframe/video/3054113',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
+ 'info_dict': {
+ 'id': '3054113',
+ 'ext': 'mp4',
+ 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
+ 'timestamp': int,
+ 'upload_date': '20150130',
+ },
+ },
+ {
+ 'url': 'http://www.tv4play.se/sport/3060959',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/film/2378136',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/program/farang/3922081',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'https://playback-api.b17g.net/asset/%s' % video_id,
+ video_id, 'Downloading video info JSON', query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls,dash',
+ 'drm': 'widevine',
+ })['metadata']
+
+ title = info['title']
+
+ manifest_url = self._download_json(
+ 'https://playback-api.b17g.net/media/' + video_id,
+ video_id, query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls',
+ })['playbackItem']['manifestUrl']
+ formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ manifest_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ manifest_url.replace('.m3u8', '.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
+ video_id, ism_id='mss', fatal=False))
+
+ if not formats and info.get('is_geo_restricted'):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ # 'subtitles': subtitles,
+ 'description': info.get('description'),
+ 'timestamp': parse_iso8601(info.get('broadcast_date_time')),
+ 'duration': int_or_none(info.get('duration')),
+ 'thumbnail': info.get('image'),
+ 'is_live': info.get('isLive') is True,
+ 'series': info.get('seriesTitle'),
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode': info.get('episodeTitle'),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ }
diff --git a/hypervideo_dl/extractor/tv5mondeplus.py b/hypervideo_dl/extractor/tv5mondeplus.py
new file mode 100644
index 0000000..b7fe082
--- /dev/null
+++ b/hypervideo_dl/extractor/tv5mondeplus.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ parse_duration,
+)
+
+
+class TV5MondePlusIE(InfoExtractor):
+ IE_DESC = 'TV5MONDE+'
+ _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # movie
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
+ 'md5': '8cbde5ea7b296cf635073e27895e227f',
+ 'info_dict': {
+ 'id': '822a4756-0712-7329-1859-a13ac7fd1407',
+ 'display_id': 'rendez-vous-a-atlit',
+ 'ext': 'mp4',
+ 'title': 'Rendez-vous à Atlit',
+ 'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
+ 'upload_date': '20200130',
+ },
+ }, {
+ # series episode
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
+ 'info_dict': {
+ 'id': '0df7007c-4900-3936-c601-87a13a93a068',
+ 'display_id': 'c-est-la-vie-ennemie-juree',
+ 'ext': 'mp4',
+ 'title': "C'est la vie - Ennemie jurée",
+ 'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
+ 'upload_date': '20200130',
+ 'series': "C'est la vie",
+ 'episode': 'Ennemie jurée',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
+ self.raise_geo_restricted(countries=['FR'])
+
+ title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+ vpl_data = extract_attributes(self._search_regex(
+ r'(<[^>]+class="video_player_loader"[^>]+>)',
+ webpage, 'video player loader'))
+
+ video_files = self._parse_json(
+ vpl_data['data-broadcast'], display_id).get('files', [])
+ formats = []
+ for video_file in video_files:
+ v_url = video_file.get('url')
+ if not v_url:
+ continue
+ video_format = video_file.get('format') or determine_ext(v_url)
+ if video_format == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': v_url,
+ 'format_id': video_format,
+ })
+ self._sort_formats(formats)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ series = self._html_search_regex(
+ r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
+ 'series', default=None)
+
+ if series and series != title:
+ title = '%s - %s' % (series, title)
+
+ upload_date = self._search_regex(
+ r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
+ webpage, 'upload date', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('_', '')
+
+ video_id = self._search_regex(
+ (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': vpl_data.get('data-image'),
+ 'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'series': series,
+ 'episode': episode,
+ }
diff --git a/hypervideo_dl/extractor/tv5unis.py b/hypervideo_dl/extractor/tv5unis.py
new file mode 100644
index 0000000..eabdc22
--- /dev/null
+++ b/hypervideo_dl/extractor/tv5unis.py
@@ -0,0 +1,121 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ smuggle_url,
+ try_get,
+)
+
+
+class TV5UnisBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CA']
+
+ def _real_extract(self, url):
+ groups = re.match(self._VALID_URL, url).groups()
+ product = self._download_json(
+ 'https://api.tv5unis.ca/graphql', groups[0], query={
+ 'query': '''{
+ %s(%s) {
+ collection {
+ title
+ }
+ episodeNumber
+ rating {
+ name
+ }
+ seasonNumber
+ tags
+ title
+ videoElement {
+ ... on Video {
+ mediaId
+ }
+ }
+ }
+}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)),
+ })['data'][self._GQL_QUERY_NAME]
+ media_id = product['videoElement']['mediaId']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': media_id,
+ 'title': product.get('title'),
+ 'url': smuggle_url('limelight:media:' + media_id, {'geo_countries': self._GEO_COUNTRIES}),
+ 'age_limit': parse_age_limit(try_get(product, lambda x: x['rating']['name'])),
+ 'tags': product.get('tags'),
+ 'series': try_get(product, lambda x: x['collection']['title']),
+ 'season_number': int_or_none(product.get('seasonNumber')),
+ 'episode_number': int_or_none(product.get('episodeNumber')),
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class TV5UnisVideoIE(TV5UnisBaseIE):
+ IE_NAME = 'tv5unis:video'
+ _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.tv5unis.ca/videos/bande-annonces/71843',
+ 'md5': '3d794164928bda97fb87a17e89923d9b',
+ 'info_dict': {
+ 'id': 'a883684aecb2486cad9bdc7bbe17f861',
+ 'ext': 'mp4',
+ 'title': 'Watatatow',
+ 'duration': 10.01,
+ }
+ }
+ _GQL_QUERY_NAME = 'productById'
+
+ @staticmethod
+ def _gql_args(groups):
+ return 'id: %s' % groups
+
+
+class TV5UnisIE(TV5UnisBaseIE):
+ IE_NAME = 'tv5unis'
+ _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/6/episodes/1',
+ 'md5': 'a479907d2e531a73e1f8dc48d6388d02',
+ 'info_dict': {
+ 'id': 'e5ee23a586c44612a56aad61accf16ef',
+ 'ext': 'mp4',
+ 'title': 'Je ne peux pas lui résister',
+ 'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu\'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...",
+ 'subtitles': {
+ 'fr': 'count:1',
+ },
+ 'duration': 1370,
+ 'age_limit': 8,
+ 'tags': 'count:3',
+ 'series': 'Watatatow',
+ 'season_number': 6,
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://www.tv5unis.ca/videos/le-voyage-de-fanny',
+ 'md5': '9ca80ebb575c681d10cae1adff3d4774',
+ 'info_dict': {
+ 'id': '726188eefe094d8faefb13381d42bc06',
+ 'ext': 'mp4',
+ 'title': 'Le voyage de Fanny',
+ 'description': "Fanny, 12 ans, cachée dans un foyer loin de ses parents, s'occupe de ses deux soeurs. Devant fuir, Fanny prend la tête d'un groupe de huit enfants et s'engage dans un dangereux périple à travers la France occupée pour rejoindre la frontière suisse.",
+ 'subtitles': {
+ 'fr': 'count:1',
+ },
+ 'duration': 5587.034,
+ 'tags': 'count:4',
+ },
+ }]
+ _GQL_QUERY_NAME = 'productByRootProductSlug'
+
+ @staticmethod
+ def _gql_args(groups):
+ args = 'rootProductSlug: "%s"' % groups[0]
+ if groups[1]:
+ args += ', seasonNumber: %s, episodeNumber: %s' % groups[1:]
+ return args
diff --git a/hypervideo_dl/extractor/tva.py b/hypervideo_dl/extractor/tva.py
new file mode 100644
index 0000000..52a4ddf
--- /dev/null
+++ b/hypervideo_dl/extractor/tva.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ strip_or_none,
+)
+
+
+class TVAIE(InfoExtractor):
+ _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://videos.tva.ca/details/_5596811470001',
+ 'info_dict': {
+ 'id': '5596811470001',
+ 'ext': 'mp4',
+ 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20171003',
+ 'timestamp': 1507064617,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'https://video.tva.ca/details/_5596811470001',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
+ 'ie_key': 'BrightcoveNew',
+ }
+
+
+class QubIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
+ 'md5': '949490fd0e7aee11d0543777611fbd53',
+ 'info_dict': {
+ 'id': '6084352463001',
+ 'ext': 'mp4',
+ 'title': 'Épisode 01',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20190907',
+ 'timestamp': 1567899756,
+ 'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+ },
+ }, {
+ 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
+ 'only_matching': True,
+ }]
+ # reference_id also works with old account_id(5481942443001)
+ # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
+
+ def _real_extract(self, url):
+ entity_id = self._match_id(url)
+ entity = self._download_json(
+ 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
+ entity_id, query={'id': entity_id})
+ video_id = entity['videoId']
+ episode = strip_or_none(entity.get('name'))
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': episode,
+ # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
+ 'url': 'https://videos.tva.ca/details/_' + video_id,
+ 'description': entity.get('longDescription'),
+ 'duration': float_or_none(entity.get('durationMillis'), 1000),
+ 'episode': episode,
+ 'episode_number': int_or_none(entity.get('episodeNumber')),
+ # 'ie_key': 'BrightcoveNew',
+ 'ie_key': TVAIE.ie_key(),
+ }
diff --git a/hypervideo_dl/extractor/tvanouvelles.py b/hypervideo_dl/extractor/tvanouvelles.py
new file mode 100644
index 0000000..1086176
--- /dev/null
+++ b/hypervideo_dl/extractor/tvanouvelles.py
@@ -0,0 +1,65 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+
+
+class TVANouvellesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tvanouvelles.ca/videos/5117035533001',
+ 'info_dict': {
+ 'id': '5117035533001',
+ 'ext': 'mp4',
+ 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications',
+ 'description': 'md5:479653b7c8cf115747bf5118066bd8b3',
+ 'uploader_id': '1741764581',
+ 'timestamp': 1473352030,
+ 'upload_date': '20160908',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ brightcove_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ BrightcoveNewIE.ie_key(), brightcove_id)
+
+
+class TVANouvellesArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte',
+ 'info_dict': {
+ 'id': 'des-policiers-qui-ont-la-meche-un-peu-courte',
+ 'title': 'Des policiers qui ont «la mèche un peu courte»?',
+ 'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = [
+ self.url_result(
+ 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'),
+ ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id'))
+ for mobj in re.finditer(
+ r'data-video-id=(["\'])?(?P<id>\d+)', webpage)]
+
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/hypervideo_dl/extractor/tvc.py b/hypervideo_dl/extractor/tvc.py
new file mode 100644
index 0000000..008f64c
--- /dev/null
+++ b/hypervideo_dl/extractor/tvc.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+)
+
+
+class TVCIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
+ 'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
+ 'info_dict': {
+ 'id': '74622',
+ 'ext': 'mp4',
+ 'title': 'События. "События". Эфир от 22.05.2015 14:30',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1122,
+ },
+ }
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://www.tvc.ru/video/json/id/%s' % video_id, video_id)
+
+ formats = []
+ for info in video.get('path', {}).get('quality', []):
+ video_url = info.get('url')
+ if not video_url:
+ continue
+ format_id = self._search_regex(
+ r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url,
+ 'format id', default=None)
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': int_or_none(info.get('width')),
+ 'height': int_or_none(info.get('height')),
+ 'tbr': int_or_none(info.get('bitrate')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'thumbnail': video.get('picture'),
+ 'duration': int_or_none(video.get('duration')),
+ 'formats': formats,
+ }
+
+
+class TVCArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
+ 'info_dict': {
+ 'id': '74622',
+ 'ext': 'mp4',
+ 'title': 'События. "События". Эфир от 22.05.2015 14:30',
+ 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1122,
+ },
+ }, {
+ 'url': 'http://www.tvc.ru/news/show/id/69944',
+ 'info_dict': {
+ 'id': '75399',
+ 'ext': 'mp4',
+ 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
+ 'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 278,
+ },
+ }, {
+ 'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#',
+ 'info_dict': {
+ 'id': '2185',
+ 'ext': 'mp4',
+ 'title': 'Ещё не поздно. Эфир от 03.08.2013',
+ 'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3316,
+ },
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, self._match_id(url))
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'TVC',
+ 'url': self._og_search_video_url(webpage),
+ 'title': clean_html(self._og_search_title(webpage)),
+ 'description': clean_html(self._og_search_description(webpage)),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/tver.py b/hypervideo_dl/extractor/tver.py
new file mode 100644
index 0000000..a4a30b1
--- /dev/null
+++ b/hypervideo_dl/extractor/tver.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ remove_start,
+ smuggle_url,
+ try_get,
+)
+
+
+class TVerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))'
+ # videos are only available for 7 days
+ _TESTS = [{
+ 'url': 'https://tver.jp/corner/f0062178',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tver.jp/feature/f0062413',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tver.jp/episode/79622438',
+ 'only_matching': True,
+ }, {
+ # subtitle = ' '
+ 'url': 'https://tver.jp/corner/f0068870',
+ 'only_matching': True,
+ }]
+ _TOKEN = None
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_initialize(self):
+ self._TOKEN = self._download_json(
+ 'https://tver.jp/api/access_token.php', None)['token']
+
+ def _real_extract(self, url):
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ main = self._download_json(
+ 'https://api.tver.jp/v4/' + path, video_id,
+ query={'token': self._TOKEN})['main']
+ p_id = main['publisher_id']
+ service = remove_start(main['service'], 'ts_')
+
+ r_id = main['reference_id']
+ if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
+ r_id = 'ref:' + r_id
+ bc_url = smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
+ {'geo_countries': ['JP']})
+
+ return {
+ '_type': 'url_transparent',
+ 'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
+ 'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
+ 'url': bc_url,
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/hypervideo_dl/extractor/tvigle.py b/hypervideo_dl/extractor/tvigle.py
new file mode 100644
index 0000000..180259a
--- /dev/null
+++ b/hypervideo_dl/extractor/tvigle.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ try_get,
+ url_or_none,
+)
+
+
+class TvigleIE(InfoExtractor):
+ IE_NAME = 'tvigle'
+ IE_DESC = 'Интернет-телевидение Tvigle.ru'
+ _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
+
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
+
+ _TESTS = [
+ {
+ 'url': 'http://www.tvigle.ru/video/sokrat/',
+ 'info_dict': {
+ 'id': '1848932',
+ 'display_id': 'sokrat',
+ 'ext': 'mp4',
+ 'title': 'Сократ',
+ 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
+ 'duration': 6586,
+ 'age_limit': 12,
+ },
+ 'skip': 'georestricted',
+ },
+ {
+ 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
+ 'info_dict': {
+ 'id': '5142516',
+ 'ext': 'flv',
+ 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
+ 'description': 'md5:027f7dc872948f14c96d19b4178428a4',
+ 'duration': 186.080,
+ 'age_limit': 0,
+ },
+ 'skip': 'georestricted',
+ }, {
+ 'url': 'https://cloud.tvigle.ru/video/5267604/',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(
+ (r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
+ r'cloudId\s*=\s*["\'](\d+)',
+ r'class="video-preview current_playing" id="(\d+)"'),
+ webpage, 'video id')
+
+ video_data = self._download_json(
+ 'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
+
+ item = video_data['playlist']['items'][0]
+
+ videos = item.get('videos')
+
+ error_message = item.get('errorMessage')
+ if not videos and error_message:
+ if item.get('isGeoBlocked') is True:
+ self.raise_geo_restricted(
+ msg=error_message, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message),
+ expected=True)
+
+ title = item['title']
+ description = item.get('description')
+ thumbnail = item.get('thumbnail')
+ duration = float_or_none(item.get('durationMilliseconds'), 1000)
+ age_limit = parse_age_limit(item.get('ageRestrictions'))
+
+ formats = []
+ for vcodec, url_or_fmts in item['videos'].items():
+ if vcodec == 'hls':
+ m3u8_url = url_or_none(url_or_fmts)
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif vcodec == 'dash':
+ mpd_url = url_or_none(url_or_fmts)
+ if not mpd_url:
+ continue
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ if not isinstance(url_or_fmts, dict):
+ continue
+ for format_id, video_url in url_or_fmts.items():
+ if format_id == 'm3u8':
+ continue
+ video_url = url_or_none(video_url)
+ if not video_url:
+ continue
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
+ filesize = int_or_none(try_get(
+ item, lambda x: x['video_files_size'][vcodec][format_id]))
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%s-%s' % (vcodec, format_id),
+ 'vcodec': vcodec,
+ 'height': int_or_none(height),
+ 'filesize': filesize,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tvland.py b/hypervideo_dl/extractor/tvland.py
new file mode 100644
index 0000000..7911441
--- /dev/null
+++ b/hypervideo_dl/extractor/tvland.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .spike import ParamountNetworkIE
+
+
+class TVLandIE(ParamountNetworkIE):
+ IE_NAME = 'tvland.com'
+ _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
+ _TESTS = [{
+ # Geo-restricted. Without a proxy metadata are still there. With a
+ # proxy it redirects to http://m.tvland.com/app/
+ 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19',
+ 'info_dict': {
+ 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a',
+ 'title': 'The Dog',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6',
+ 'md5': 'e2c6389401cf485df26c79c247b08713',
+ 'info_dict': {
+ 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757',
+ 'ext': 'mp4',
+ 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6',
+ 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2',
+ 'upload_date': '20190430',
+ 'timestamp': 1556658000,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/tvn24.py b/hypervideo_dl/extractor/tvn24.py
new file mode 100644
index 0000000..de0fb50
--- /dev/null
+++ b/hypervideo_dl/extractor/tvn24.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ NO_DEFAULT,
+ unescapeHTML,
+)
+
+
+class TVN24IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
+ 'md5': 'fbdec753d7bc29d96036808275f2130c',
+ 'info_dict': {
+ 'id': '1584444',
+ 'ext': 'mp4',
+ 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
+ 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
+ 'thumbnail': 're:https?://.*[.]jpeg',
+ }
+ }, {
+ # different layout
+ 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
+ 'info_dict': {
+ 'id': '1771763',
+ 'ext': 'mp4',
+ 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
+ 'thumbnail': 're:https?://.*',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
+ webpage, 'title')
+
+ def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
+ name, group='json', default=default, fatal=fatal) or '{}',
+ display_id, transform_source=unescapeHTML, fatal=fatal)
+
+ quality_data = extract_json('data-quality', 'formats')
+
+ formats = []
+ for format_id, url in quality_data.items():
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id.rstrip('p')),
+ })
+ self._sort_formats(formats)
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_regex(
+ r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
+ 'thumbnail', group='url')
+
+ video_id = None
+
+ share_params = extract_json(
+ 'data-share-params', 'share params', default=None)
+ if isinstance(share_params, dict):
+ video_id = share_params.get('id')
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-vid-id=["\'](\d+)', webpage, 'video id',
+ default=None) or self._search_regex(
+ r',(\d+)\.html', url, 'video id', default=display_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tvnet.py b/hypervideo_dl/extractor/tvnet.py
new file mode 100644
index 0000000..4222ff9
--- /dev/null
+++ b/hypervideo_dl/extractor/tvnet.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class TVNetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
+ _TESTS = [{
+ # video
+ 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
+ 'md5': 'b4d7abe0252c9b47774760b7519c7558',
+ 'info_dict': {
+ 'id': '109788',
+ 'ext': 'mp4',
+ 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': False,
+ 'view_count': int,
+ },
+ }, {
+ # audio
+ 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi',
+ 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae',
+ 'info_dict': {
+ 'id': '27017',
+ 'ext': 'm4a',
+ 'title': 'VOV1 - Bản tin chiều (10/06/2018)',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': False,
+ },
+ }, {
+ 'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
+ 'info_dict': {
+ 'id': '129999',
+ 'ext': 'mp4',
+ 'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
+ 'info_dict': {
+ 'id': '1011',
+ 'ext': 'mp4',
+ 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # radio live stream
+ 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014',
+ 'info_dict': {
+ 'id': '1014',
+ 'ext': 'm4a',
+ 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, default=None) or self._search_regex(
+ r'<title>([^<]+)<', webpage, 'title')
+ title = re.sub(r'\s*-\s*TV Net\s*$', '', title)
+
+ if '/video/' in url or '/radio/' in url:
+ is_live = False
+ elif '/kenh-truyen-hinh/' in url:
+ is_live = True
+ else:
+ is_live = None
+
+ data_file = unescapeHTML(self._search_regex(
+ r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
+ 'data file', group='url'))
+
+ stream_urls = set()
+ formats = []
+ for stream in self._download_json(data_file, video_id):
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if stream_url in stream_urls or not stream_url:
+ continue
+ stream_urls.add(stream_url)
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4',
+ entry_protocol='m3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ # better support for radio streams
+ if title.startswith('VOV'):
+ for f in formats:
+ f.update({
+ 'ext': 'm4a',
+ 'vcodec': 'none',
+ })
+
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or unescapeHTML(
+ self._search_regex(
+ r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
+ 'thumbnail', default=None, group='url'))
+
+ if is_live:
+ title = self._live_title(title)
+
+ view_count = int_or_none(self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>',
+ webpage, 'view count', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'is_live': is_live,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tvnoe.py b/hypervideo_dl/extractor/tvnoe.py
new file mode 100644
index 0000000..26a5aea
--- /dev/null
+++ b/hypervideo_dl/extractor/tvnoe.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ js_to_json,
+)
+
+
+class TVNoeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.tvnoe.cz/video/10362',
+ 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
+ 'info_dict': {
+ 'id': '10362',
+ 'ext': 'mp4',
+ 'series': 'Noční univerzita',
+ 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
+ 'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ iframe_url = self._search_regex(
+ r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
+
+ ifs_page = self._download_webpage(iframe_url, video_id)
+ jwplayer_data = self._find_jwplayer_data(
+ ifs_page, video_id, transform_source=js_to_json)
+ info_dict = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=iframe_url)
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': clean_html(get_element_by_class(
+ 'field-name-field-podnazev', webpage)),
+ 'description': clean_html(get_element_by_class(
+ 'field-name-body', webpage)),
+ 'series': clean_html(get_element_by_class('title', webpage))
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/tvnow.py b/hypervideo_dl/extractor/tvnow.py
new file mode 100644
index 0000000..9c8a8a0
--- /dev/null
+++ b/hypervideo_dl/extractor/tvnow.py
@@ -0,0 +1,486 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ parse_duration,
+ str_or_none,
+ update_url_query,
+ urljoin,
+)
+
+
+class TVNowBaseIE(InfoExtractor):
+ _VIDEO_FIELDS = (
+ 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
+ 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
+ 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
+ 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
+
+ def _call_api(self, path, video_id, query):
+ return self._download_json(
+ 'https://api.tvnow.de/v3/' + path, video_id, query=query)
+
+ def _extract_video(self, info, display_id):
+ video_id = compat_str(info['id'])
+ title = info['title']
+
+ paths = []
+ for manifest_url in (info.get('manifest') or {}).values():
+ if not manifest_url:
+ continue
+ manifest_url = update_url_query(manifest_url, {'filter': ''})
+ path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+ if path in paths:
+ continue
+ paths.append(path)
+
+ def url_repl(proto, suffix):
+ return re.sub(
+ r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+ r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+ '.ism/' + suffix, manifest_url))
+
+ def make_urls(proto, suffix):
+ urls = [url_repl(proto, suffix)]
+ hd_url = urls[0].replace('/manifest/', '/ngvod/')
+ if hd_url != urls[0]:
+ urls.append(hd_url)
+ return urls
+
+ for man_url in make_urls('dash', '.mpd'):
+ formats = self._extract_mpd_formats(
+ man_url, video_id, mpd_id='dash', fatal=False)
+ for man_url in make_urls('hss', 'Manifest'):
+ formats.extend(self._extract_ism_formats(
+ man_url, video_id, ism_id='mss', fatal=False))
+ for man_url in make_urls('hls', '.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+ fatal=False))
+ if formats:
+ break
+ else:
+ if info.get('isDrm'):
+ raise ExtractorError(
+ 'Video %s is DRM protected' % video_id, expected=True)
+ if info.get('geoblocked'):
+ raise self.raise_geo_restricted()
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+ self._sort_formats(formats)
+
+ description = info.get('articleLong') or info.get('articleShort')
+ timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+ duration = parse_duration(info.get('duration'))
+
+ f = info.get('format', {})
+
+ thumbnails = [{
+ 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
+ }]
+ thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': f.get('title'),
+ 'season_number': int_or_none(info.get('season')),
+ 'episode_number': int_or_none(info.get('episode')),
+ 'episode': title,
+ 'formats': formats,
+ }
+
+
+class TVNowIE(TVNowBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
+ (?P<show_id>[^/]+)/
+ (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
+ '''
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
+ else super(TVNowIE, cls).suitable(url))
+
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
+ 'info_dict': {
+ 'id': '331082',
+ 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'ext': 'mp4',
+ 'title': 'Der neue Porsche 911 GT 3',
+ 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
+ 'timestamp': 1495994400,
+ 'upload_date': '20170528',
+ 'duration': 5283,
+ 'series': 'GRIP - Das Motormagazin',
+ 'season_number': 14,
+ 'episode_number': 405,
+ 'episode': 'Der neue Porsche 911 GT 3',
+ },
+ }, {
+ # rtl2
+ 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
+ 'only_matching': True,
+ }, {
+ # rtlnitro
+ 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
+ 'only_matching': True,
+ }, {
+ # superrtl
+ 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
+ 'only_matching': True,
+ }, {
+ # ntv
+ 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
+ 'only_matching': True,
+ }, {
+ # vox
+ 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
+ 'only_matching': True,
+ }, {
+ # rtlplus
+ 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = '%s/%s' % mobj.group(2, 3)
+
+ info = self._call_api(
+ 'movies/' + display_id, display_id, query={
+ 'fields': ','.join(self._VIDEO_FIELDS),
+ })
+
+ return self._extract_video(info, display_id)
+
+
+class TVNowNewIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:shows|serien))/
+ (?P<show>[^/]+)-\d+/
+ [^/]+/
+ episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
+ show, episode = mobj.group('show', 'episode')
+ return self.url_result(
+ # Rewrite new URLs to the old format and use extraction via old API
+ # at api.tvnow.de as a loophole for bypassing premium content checks
+ '%s/%s/%s' % (base_url, show, episode),
+ ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
+
+
+class TVNowNewBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, query={}):
+ result = self._download_json(
+ 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
+ error = result.get('error')
+ if error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ return result
+
+
+r"""
+TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
+when api.tvnow.de is shut down. This version can't bypass premium checks though.
+class TVNowIE(TVNowNewBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:shows|serien)/[^/]+/
+ (?:[^/]+/)+
+ (?P<display_id>[^/?$&]+)-(?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ # episode with annual navigation
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'info_dict': {
+ 'id': '331082',
+ 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'ext': 'mp4',
+ 'title': 'Der neue Porsche 911 GT 3',
+ 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1495994400,
+ 'upload_date': '20170528',
+ 'duration': 5283,
+ 'series': 'GRIP - Das Motormagazin',
+ 'season_number': 14,
+ 'episode_number': 405,
+ 'episode': 'Der neue Porsche 911 GT 3',
+ },
+ }, {
+ # rtl2, episode with season navigation
+ 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
+ 'only_matching': True,
+ }, {
+ # rtlnitro
+ 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
+ 'only_matching': True,
+ }, {
+ # superrtl
+ 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
+ 'only_matching': True,
+ }, {
+ # ntv
+ 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
+ 'only_matching': True,
+ }, {
+ # vox
+ 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'only_matching': True,
+ }]
+
+ def _extract_video(self, info, url, display_id):
+ config = info['config']
+ source = config['source']
+
+ video_id = compat_str(info.get('id') or source['videoId'])
+ title = source['title'].strip()
+
+ paths = []
+ for manifest_url in (info.get('manifest') or {}).values():
+ if not manifest_url:
+ continue
+ manifest_url = update_url_query(manifest_url, {'filter': ''})
+ path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+ if path in paths:
+ continue
+ paths.append(path)
+
+ def url_repl(proto, suffix):
+ return re.sub(
+ r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+ r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+ '.ism/' + suffix, manifest_url))
+
+ formats = self._extract_mpd_formats(
+ url_repl('dash', '.mpd'), video_id,
+ mpd_id='dash', fatal=False)
+ formats.extend(self._extract_ism_formats(
+ url_repl('hss', 'Manifest'),
+ video_id, ism_id='mss', fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ url_repl('hls', '.m3u8'), video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ if formats:
+ break
+ else:
+ if try_get(info, lambda x: x['rights']['isDrm']):
+ raise ExtractorError(
+ 'Video %s is DRM protected' % video_id, expected=True)
+ if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
+ raise self.raise_geo_restricted()
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+ self._sort_formats(formats)
+
+ description = source.get('description')
+ thumbnail = url_or_none(source.get('poster'))
+ timestamp = unified_timestamp(source.get('previewStart'))
+ duration = parse_duration(source.get('length'))
+
+ series = source.get('format')
+ season_number = int_or_none(self._search_regex(
+ r'staffel-(\d+)', url, 'season number', default=None))
+ episode_number = int_or_none(self._search_regex(
+ r'episode-(\d+)', url, 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'episode': title,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+ info = self._call_api('player/' + video_id, video_id)
+ return self._extract_video(info, video_id, display_id)
+"""
+
+
+class TVNowListBaseIE(TVNowNewBaseIE):
+ _SHOW_VALID_URL = r'''(?x)
+ (?P<base_url>
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
+ [^/?#&]+-(?P<show_id>\d+)
+ )
+ '''
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url)
+ else super(TVNowListBaseIE, cls).suitable(url))
+
+ def _extract_items(self, url, show_id, list_id, query):
+ items = self._call_api(
+ 'teaserrow/format/episode/' + show_id, list_id,
+ query=query)['items']
+
+ entries = []
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_url = urljoin(url, item.get('url'))
+ if not item_url:
+ continue
+ video_id = str_or_none(item.get('id') or item.get('videoId'))
+ item_title = item.get('subheadline') or item.get('text')
+ entries.append(self.url_result(
+ item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
+ video_title=item_title))
+
+ return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
+
+
+class TVNowSeasonIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
+ 'info_dict': {
+ 'id': '1815/13',
+ },
+ 'playlist_mincount': 22,
+ }]
+
+ def _real_extract(self, url):
+ _, show_id, season_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_items(
+ url, show_id, season_id, {'season': season_id})
+
+
+class TVNowAnnualIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
+ 'info_dict': {
+ 'id': '1669/2017-05',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ _, show_id, year, month = re.match(self._VALID_URL, url).groups()
+ return self._extract_items(
+ url, show_id, '%s-%s' % (year, month), {
+ 'year': int(year),
+ 'month': int(month),
+ })
+
+
+class TVNowShowIE(TVNowListBaseIE):
+ _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ # annual navigationType
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
+ 'info_dict': {
+ 'id': '1669',
+ },
+ 'playlist_mincount': 73,
+ }, {
+ # season navigationType
+ 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
+ 'info_dict': {
+ 'id': '11471',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
+ else super(TVNowShowIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ base_url, show_id = re.match(self._VALID_URL, url).groups()
+
+ result = self._call_api(
+ 'teaserrow/format/navigation/' + show_id, show_id)
+
+ items = result['items']
+
+ entries = []
+ navigation = result.get('navigationType')
+ if navigation == 'annual':
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ year = int_or_none(item.get('year'))
+ if year is None:
+ continue
+ months = item.get('months')
+ if not isinstance(months, list):
+ continue
+ for month_dict in months:
+ if not isinstance(month_dict, dict) or not month_dict:
+ continue
+ month_number = int_or_none(list(month_dict.keys())[0])
+ if month_number is None:
+ continue
+ entries.append(self.url_result(
+ '%s/%04d-%02d' % (base_url, year, month_number),
+ ie=TVNowAnnualIE.ie_key()))
+ elif navigation == 'season':
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ season_number = int_or_none(item.get('season'))
+ if season_number is None:
+ continue
+ entries.append(self.url_result(
+ '%s/staffel-%d' % (base_url, season_number),
+ ie=TVNowSeasonIE.ie_key()))
+ else:
+ raise ExtractorError('Unknown navigationType')
+
+ return self.playlist_result(entries, show_id)
diff --git a/hypervideo_dl/extractor/tvp.py b/hypervideo_dl/extractor/tvp.py
new file mode 100644
index 0000000..accff75
--- /dev/null
+++ b/hypervideo_dl/extractor/tvp.py
@@ -0,0 +1,252 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ get_element_by_attribute,
+ orderedSet,
+)
+
+
+class TVPIE(InfoExtractor):
+ IE_NAME = 'tvp'
+ IE_DESC = 'Telewizja Polska'
+ _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
+ 'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
+ 'info_dict': {
+ 'id': '194536',
+ 'ext': 'mp4',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ 'description': 'md5:437f48b93558370b031740546b696e24',
+ },
+ }, {
+ 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
+ 'md5': 'b0005b542e5b4de643a9690326ab1257',
+ 'info_dict': {
+ 'id': '17916176',
+ 'ext': 'mp4',
+ 'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
+ 'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
+ },
+ }, {
+ # page id is not the same as video id(#7799)
+ 'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
+ 'md5': '84cd3c8aec4840046e5ab712416b73d0',
+ 'info_dict': {
+ 'id': '33908820',
+ 'ext': 'mp4',
+ 'title': 'Wiadomości, 28.09.2017, 19:30',
+ 'description': 'Wydanie główne codziennego serwisu informacyjnego.'
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+ video_id = self._search_regex([
+ r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
+ r"object_id\s*:\s*'(\d+)'",
+ r'data-video-id="(\d+)"'], webpage, 'video id', default=page_id)
+ return {
+ '_type': 'url_transparent',
+ 'url': 'tvp:' + video_id,
+ 'description': self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'ie_key': 'TVPEmbed',
+ }
+
+
+class TVPEmbedIE(InfoExtractor):
+ IE_NAME = 'tvp:embed'
+ IE_DESC = 'Telewizja Polska'
+ _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'tvp:194536',
+ 'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
+ 'info_dict': {
+ 'id': '194536',
+ 'ext': 'mp4',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ },
+ }, {
+ # not available
+ 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268',
+ 'md5': '8c9cd59d16edabf39331f93bf8a766c7',
+ 'info_dict': {
+ 'id': '22670268',
+ 'ext': 'mp4',
+ 'title': 'Panorama, 07.12.2015, 15:40',
+ },
+ 'skip': 'Transmisja została zakończona lub materiał niedostępny',
+ }, {
+ 'url': 'tvp:22670268',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
+
+ error = self._html_search_regex(
+ r'(?s)<p[^>]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)</p>',
+ webpage, 'error', default=None) or clean_html(
+ get_element_by_attribute('class', 'msg error', webpage))
+ if error:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, clean_html(error)), expected=True)
+
+ title = self._search_regex(
+ r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
+ webpage, 'title', group='title')
+ series_title = self._search_regex(
+ r'name\s*:\s*([\'"])SeriesTitle\1\s*,\s*value\s*:\s*\1(?P<series>.+?)\1',
+ webpage, 'series', group='series', default=None)
+ if series_title:
+ title = '%s, %s' % (series_title, title)
+
+ thumbnail = self._search_regex(
+ r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
+
+ video_url = self._search_regex(
+ r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
+ 'formats', group='url', default=None)
+ if not video_url or 'material_niedostepny.mp4' in video_url:
+ video_url = self._download_json(
+ 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
+ video_id)['video_url']
+
+ formats = []
+ video_url_base = self._search_regex(
+ r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
+ video_url, 'video base url', default=None)
+ if video_url_base:
+ # TODO: <Group> found instead of <AdaptationSet> in MPD manifest.
+ # It's not mentioned in MPEG-DASH standard. Figure that out.
+ # formats.extend(self._extract_mpd_formats(
+ # video_url_base + '.ism/video.mpd',
+ # video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ video_url_base + '.ism/Manifest',
+ video_id, 'mss', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ video_url_base + '.ism/video.f4m',
+ video_id, f4m_id='hds', fatal=False))
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url_base + '.ism/video.m3u8', video_id,
+ 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ self._sort_formats(m3u8_formats)
+ m3u8_formats = list(filter(
+ lambda f: f.get('vcodec') != 'none', m3u8_formats))
+ formats.extend(m3u8_formats)
+ for i, m3u8_format in enumerate(m3u8_formats, 2):
+ http_url = '%s-%d.mp4' % (video_url_base, i)
+ if self._is_valid_url(http_url, video_id):
+ f = m3u8_format.copy()
+ f.update({
+ 'url': http_url,
+ 'format_id': f['format_id'].replace('hls', 'http'),
+ 'protocol': 'http',
+ })
+ formats.append(f)
+ else:
+ formats = [{
+ 'format_id': 'direct',
+ 'url': video_url,
+ 'ext': determine_ext(video_url, 'mp4'),
+ }]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class TVPWebsiteIE(InfoExtractor):
+ IE_NAME = 'tvp:series'
+ _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)'
+
+ _TESTS = [{
+ # series
+ 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video',
+ 'info_dict': {
+ 'id': '38678312',
+ },
+ 'playlist_count': 115,
+ }, {
+ # film
+ 'url': 'https://vod.tvp.pl/website/gloria,35139666',
+ 'info_dict': {
+ 'id': '36637049',
+ 'ext': 'mp4',
+ 'title': 'Gloria, Gloria',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['TVPEmbed'],
+ }, {
+ 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, display_id, playlist_id):
+ url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id)
+ for page_num in itertools.count(1):
+ page = self._download_webpage(
+ url, display_id, 'Downloading page %d' % page_num,
+ query={'page': page_num})
+
+ video_ids = orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id,
+ page))
+
+ if not video_ids:
+ break
+
+ for video_id in video_ids:
+ yield self.url_result(
+ 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(),
+ video_id=video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, playlist_id = mobj.group('display_id', 'id')
+ return self.playlist_result(
+ self._entries(display_id, playlist_id), playlist_id)
diff --git a/hypervideo_dl/extractor/tvplay.py b/hypervideo_dl/extractor/tvplay.py
new file mode 100644
index 0000000..0d858c0
--- /dev/null
+++ b/hypervideo_dl/extractor/tvplay.py
@@ -0,0 +1,492 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ try_get,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class TVPlayIE(InfoExtractor):
+ IE_NAME = 'mtg'
+ IE_DESC = 'MTG services'
+ _VALID_URL = r'''(?x)
+ (?:
+ mtg:|
+ https?://
+ (?:www\.)?
+ (?:
+ tvplay(?:\.skaties)?\.lv(?:/parraides)?|
+ (?:tv3play|play\.tv3)\.lt(?:/programos)?|
+ tv3play(?:\.tv3)?\.ee/sisu|
+ (?:tv(?:3|6|8|10)play|viafree)\.se/program|
+ (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
+ play\.nova(?:tv)?\.bg/programi
+ )
+ /(?:[^/]+/)+
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'md5': 'a1612fe0849455423ad8718fe049be21',
+ 'info_dict': {
+ 'id': '418113',
+ 'ext': 'mp4',
+ 'title': 'Kādi ir īri? - Viņas melo labāk',
+ 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
+ 'series': 'Viņas melo labāk',
+ 'season': '2.sezona',
+ 'season_number': 2,
+ 'duration': 25,
+ 'timestamp': 1406097056,
+ 'upload_date': '20140723',
+ },
+ },
+ {
+ 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+ 'info_dict': {
+ 'id': '409229',
+ 'ext': 'flv',
+ 'title': 'Moterys meluoja geriau',
+ 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
+ 'series': 'Moterys meluoja geriau',
+ 'episode_number': 47,
+ 'season': '1 sezonas',
+ 'season_number': 1,
+ 'duration': 1330,
+ 'timestamp': 1403769181,
+ 'upload_date': '20140626',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
+ 'info_dict': {
+ 'id': '238551',
+ 'ext': 'flv',
+ 'title': 'Kodu keset linna 398537',
+ 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
+ 'duration': 1257,
+ 'timestamp': 1292449761,
+ 'upload_date': '20101215',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
+ 'info_dict': {
+ 'id': '395385',
+ 'ext': 'mp4',
+ 'title': 'Husräddarna S02E07',
+ 'description': 'md5:f210c6c89f42d4fc39faa551be813777',
+ 'duration': 2574,
+ 'timestamp': 1400596321,
+ 'upload_date': '20140520',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
+ 'info_dict': {
+ 'id': '266636',
+ 'ext': 'mp4',
+ 'title': 'Den sista dokusåpan S01E08',
+ 'description': 'md5:295be39c872520221b933830f660b110',
+ 'duration': 1492,
+ 'timestamp': 1330522854,
+ 'upload_date': '20120229',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
+ 'info_dict': {
+ 'id': '282756',
+ 'ext': 'mp4',
+ 'title': 'Antikjakten S01E10',
+ 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
+ 'duration': 2646,
+ 'timestamp': 1348575868,
+ 'upload_date': '20120925',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
+ 'info_dict': {
+ 'id': '230898',
+ 'ext': 'mp4',
+ 'title': 'Anna Anka søker assistent - Ep. 8',
+ 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
+ 'duration': 2656,
+ 'timestamp': 1277720005,
+ 'upload_date': '20100628',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
+ 'info_dict': {
+ 'id': '21873',
+ 'ext': 'mp4',
+ 'title': 'Budbringerne program 10',
+ 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
+ 'duration': 1297,
+ 'timestamp': 1254205102,
+ 'upload_date': '20090929',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
+ 'info_dict': {
+ 'id': '361883',
+ 'ext': 'mp4',
+ 'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
+ 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
+ 'duration': 2594,
+ 'timestamp': 1393236292,
+ 'upload_date': '20140224',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
+ 'info_dict': {
+ 'id': '624952',
+ 'ext': 'flv',
+ 'title': 'Здравей, България (12.06.2015 г.) ',
+ 'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
+ 'duration': 8838,
+ 'timestamp': 1434100372,
+ 'upload_date': '20150612',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ # views is null
+ 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'mtg:418113',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ geo_country = self._search_regex(
+ r'https?://[^/]+\.([a-z]{2})', url,
+ 'geo country', default=None)
+ if geo_country:
+ self._initialize_geo_bypass({'countries': [geo_country.upper()]})
+ video = self._download_json(
+ 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
+
+ title = video['title']
+
+ try:
+ streams = self._download_json(
+ 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
+ video_id, 'Downloading streams JSON')
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
+ raise ExtractorError(msg['msg'], expected=True)
+ raise
+
+ quality = qualities(['hls', 'medium', 'high'])
+ formats = []
+ for format_id, video_url in streams.get('streams', {}).items():
+ video_url = url_or_none(video_url)
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(video_url, {
+ 'hdcore': '3.5.0',
+ 'plugin': 'aasp-3.5.0.151.81'
+ }), video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ fmt = {
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'ext': ext,
+ }
+ if video_url.startswith('rtmp'):
+ m = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
+ if not m:
+ continue
+ fmt.update({
+ 'ext': 'flv',
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'preference': -1,
+ })
+ else:
+ fmt.update({
+ 'url': video_url,
+ })
+ formats.append(fmt)
+
+ if not formats and video.get('is_geo_blocked'):
+ self.raise_geo_restricted(
+ 'This content might not be available in your country due to copyright reasons')
+
+ self._sort_formats(formats)
+
+ # TODO: webvtt in m3u8
+ subtitles = {}
+ sami_path = video.get('sami_path')
+ if sami_path:
+ lang = self._search_regex(
+ r'_([a-z]{2})\.xml', sami_path, 'lang',
+ default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1])
+ subtitles[lang] = [{
+ 'url': sami_path,
+ }]
+
+ series = video.get('format_title')
+ episode_number = int_or_none(video.get('format_position', {}).get('episode'))
+ season = video.get('_embedded', {}).get('season', {}).get('title')
+ season_number = int_or_none(video.get('format_position', {}).get('season'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'series': series,
+ 'episode_number': episode_number,
+ 'season': season,
+ 'season_number': season_number,
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'view_count': try_get(video, lambda x: x['views']['total'], int),
+ 'age_limit': int_or_none(video.get('age_limit', 0)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViafreeIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ viafree\.(?P<country>dk|no|se)
+ /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
+ 'info_dict': {
+ 'id': '757786',
+ 'ext': 'mp4',
+ 'title': 'Det beste vorspielet - Sesong 2 - Episode 1',
+ 'description': 'md5:b632cb848331404ccacd8cd03e83b4c3',
+ 'series': 'Det beste vorspielet',
+ 'season_number': 2,
+ 'duration': 1116,
+ 'timestamp': 1471200600,
+ 'upload_date': '20160814',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with relatedClips
+ 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
+ 'only_matching': True,
+ }, {
+ # Different og:image URL schema
+ 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ country, path = re.match(self._VALID_URL, url).groups()
+ content = self._download_json(
+ 'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path)
+ program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program']
+ guid = program['guid']
+ meta = content['meta']
+ title = meta['title']
+
+ try:
+ stream_href = self._download_json(
+ program['_links']['streamLink']['href'], guid,
+ headers=self.geo_verification_headers())['embedded']['prioritizedStreams'][0]['links']['stream']['href']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_geo_restricted(countries=[country])
+ raise
+
+ formats = self._extract_m3u8_formats(stream_href, guid, 'mp4')
+ self._sort_formats(formats)
+ episode = program.get('episode') or {}
+
+ return {
+ 'id': guid,
+ 'title': title,
+ 'thumbnail': meta.get('image'),
+ 'description': meta.get('description'),
+ 'series': episode.get('seriesTitle'),
+ 'episode_number': int_or_none(episode.get('episodeNumber')),
+ 'season_number': int_or_none(episode.get('seasonNumber')),
+ 'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000),
+ 'timestamp': parse_iso8601(try_get(program, lambda x: x['availability']['start'])),
+ 'formats': formats,
+ }
+
+
+class TVPlayHomeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
+ 'info_dict': {
+ 'id': '366367',
+ 'ext': 'mp4',
+ 'title': 'Aferistai',
+ 'description': 'Aferistai. Kalėdinė pasaka.',
+ 'series': 'Aferistai [N-7]',
+ 'season': '1 sezonas',
+ 'season_number': 1,
+ 'duration': 464,
+ 'timestamp': 1394209658,
+ 'upload_date': '20140307',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.tv3.lt/aferistai-10047125',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ asset = self._download_json(
+ urljoin(url, '/sb/public/asset/' + video_id), video_id)
+
+ m3u8_url = asset['movie']['contentUrl']
+ video_id = asset['assetId']
+ asset_title = asset['title']
+ title = asset_title['title']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ thumbnails = None
+ image_url = asset.get('imageUrl')
+ if image_url:
+ thumbnails = [{
+ 'url': urljoin(url, image_url),
+ 'ext': 'jpg',
+ }]
+
+ metadata = asset.get('metadata') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(asset_title.get('runTime')),
+ 'series': asset.get('tvSeriesTitle'),
+ 'season': asset.get('tvSeasonTitle'),
+ 'season_number': int_or_none(metadata.get('seasonNumber')),
+ 'episode': asset_title.get('titleBrief'),
+ 'episode_number': int_or_none(metadata.get('episodeNumber')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/tvplayer.py b/hypervideo_dl/extractor/tvplayer.py
new file mode 100644
index 0000000..8f8686a
--- /dev/null
+++ b/hypervideo_dl/extractor/tvplayer.py
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ extract_attributes,
+ try_get,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class TVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://tvplayer.com/watch/bbcone',
+ 'info_dict': {
+ 'id': '89',
+ 'ext': 'mp4',
+ 'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ current_channel = extract_attributes(self._search_regex(
+ r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
+ webpage, 'channel element'))
+ title = current_channel['data-name']
+
+ resource_id = current_channel['data-id']
+
+ token = self._search_regex(
+ r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
+ 'token', group='token')
+
+ context = self._download_json(
+ 'https://tvplayer.com/watch/context', display_id,
+ 'Downloading JSON context', query={
+ 'resource': resource_id,
+ 'gen': token,
+ })
+
+ validate = context['validate']
+ platform = try_get(
+ context, lambda x: x['platform']['key'], compat_str) or 'firefox'
+
+ try:
+ response = self._download_json(
+ 'http://api.tvplayer.com/api/v2/stream/live',
+ display_id, 'Downloading JSON stream', headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ }, data=urlencode_postdata({
+ 'id': resource_id,
+ 'service': 1,
+ 'platform': platform,
+ 'validate': validate,
+ }))['tvplayer']['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ response = self._parse_json(
+ e.cause.read().decode(), resource_id)['tvplayer']['response']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
+ raise
+
+ formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': resource_id,
+ 'display_id': display_id,
+ 'title': self._live_title(title),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/tweakers.py b/hypervideo_dl/extractor/tweakers.py
new file mode 100644
index 0000000..2b10d9b
--- /dev/null
+++ b/hypervideo_dl/extractor/tweakers.py
@@ -0,0 +1,62 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ mimetype2ext,
+)
+
+
+class TweakersIE(InfoExtractor):
+ _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
+ 'md5': 'fe73e417c093a788e0160c4025f88b15',
+ 'info_dict': {
+ 'id': '9926',
+ 'ext': 'mp4',
+ 'title': 'New Nintendo 3DS XL - Op alle fronten beter',
+ 'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'duration': 386,
+ 'uploader_id': 's7JeEm',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id,
+ video_id)['items'][0]
+
+ title = video_data['title']
+
+ formats = []
+ for location in video_data.get('locations', {}).get('progressive', []):
+ format_id = location.get('label')
+ width = int_or_none(location.get('width'))
+ height = int_or_none(location.get('height'))
+ for source in location.get('sources', []):
+ source_url = source.get('src')
+ if not source_url:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': source_url,
+ 'width': width,
+ 'height': height,
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('poster'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': video_data.get('account'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/twentyfourvideo.py b/hypervideo_dl/extractor/twentyfourvideo.py
new file mode 100644
index 0000000..74d1404
--- /dev/null
+++ b/hypervideo_dl/extractor/twentyfourvideo.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ int_or_none,
+ xpath_attr,
+ xpath_element,
+)
+
+
+class TwentyFourVideoIE(InfoExtractor):
+ IE_NAME = '24video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<host>
+ (?:(?:www|porno?)\.)?24video\.
+ (?:net|me|xxx|sexy?|tube|adult|site|vip)
+ )/
+ (?:
+ video/(?:(?:view|xml)/)?|
+ player/new24_play\.swf\?id=
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.24video.net/video/view/1044982',
+ 'md5': 'e09fc0901d9eaeedac872f154931deeb',
+ 'info_dict': {
+ 'id': '1044982',
+ 'ext': 'mp4',
+ 'title': 'Эротика каменного века',
+ 'description': 'Как смотрели порно в каменном веке.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'SUPERTELO',
+ 'duration': 31,
+ 'timestamp': 1275937857,
+ 'upload_date': '20100607',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ }, {
+ 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.24video.me/video/view/1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.24video.tube/video/view/2363750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.site/video/view/2640421',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.vip/video/view/1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porn.24video.net/video/2640421-vsya-takay',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+
+ webpage = self._download_webpage(
+ 'http://%s/video/view/%s' % (host, video_id), video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
+ webpage, 'description', fatal=False, group='description')
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = int_or_none(self._og_search_property(
+ 'duration', webpage, 'duration', fatal=False))
+ timestamp = parse_iso8601(self._search_regex(
+ r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
+ webpage, 'upload date', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
+ webpage, 'uploader', fatal=False)
+
+ view_count = int_or_none(self._html_search_regex(
+ r'<span class="video-views">(\d+) просмотр',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._html_search_regex(
+ r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
+ webpage, 'comment count', default=None))
+
+ # Sets some cookies
+ self._download_xml(
+ r'http://%s/video/xml/%s?mode=init' % (host, video_id),
+ video_id, 'Downloading init XML')
+
+ video_xml = self._download_xml(
+ 'http://%s/video/xml/%s?mode=play' % (host, video_id),
+ video_id, 'Downloading video XML')
+
+ video = xpath_element(video_xml, './/video', 'video', fatal=True)
+
+ formats = [{
+ 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
+ }]
+
+ like_count = int_or_none(video.get('ratingPlus'))
+ dislike_count = int_or_none(video.get('ratingMinus'))
+ age_limit = 18 if video.get('adult') == 'true' else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/twentymin.py b/hypervideo_dl/extractor/twentymin.py
new file mode 100644
index 0000000..a42977f
--- /dev/null
+++ b/hypervideo_dl/extractor/twentymin.py
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+)
+
+
+class TwentyMinutenIE(InfoExtractor):
+ IE_NAME = '20min'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?20min\.ch/
+ (?:
+ videotv/*\?.*?\bvid=|
+ videoplayer/videoplayer\.html\?.*?\bvideoId@
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
+ 'md5': 'e7264320db31eed8c38364150c12496e',
+ 'info_dict': {
+ 'id': '469148',
+ 'ext': 'mp4',
+ 'title': '85 000 Franken für 15 perfekte Minuten',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api.20min.ch/video/%s/show' % video_id,
+ video_id)['content']
+
+ title = video['title']
+
+ formats = [{
+ 'format_id': format_id,
+ 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
+ 'quality': quality,
+ } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
+ self._sort_formats(formats)
+
+ description = video.get('lead')
+ thumbnail = video.get('thumbnail')
+
+ def extract_count(kind):
+ return try_get(
+ video,
+ lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
+
+ like_count = extract_count('up')
+ dislike_count = extract_count('down')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/twentythreevideo.py b/hypervideo_dl/extractor/twentythreevideo.py
new file mode 100644
index 0000000..dc56091
--- /dev/null
+++ b/hypervideo_dl/extractor/twentythreevideo.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TwentyThreeVideoIE(InfoExtractor):
+ IE_NAME = '23video'
+ _VALID_URL = r'https?://(?P<domain>[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
+ _TESTS = [{
+ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
+ 'md5': '75fcf216303eb1dae9920d651f85ced4',
+ 'info_dict': {
+ 'id': '20448876',
+ 'ext': 'mp4',
+ 'title': 'Video Marketing Minute: Personalized Video',
+ 'timestamp': 1513855354,
+ 'upload_date': '20171221',
+ 'uploader_id': '12258964',
+ 'uploader': 'Rasmus Bysted',
+ }
+ }, {
+ 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, query, photo_id = re.match(self._VALID_URL, url).groups()
+ base_url = 'https://%s' % domain
+ photo_data = self._download_json(
+ base_url + '/api/photo/list?' + query, photo_id, query={
+ 'format': 'json',
+ }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo']
+ title = photo_data['title']
+
+ formats = []
+
+ audio_path = photo_data.get('audio_download')
+ if audio_path:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': base_url + audio_path,
+ 'filesize': int_or_none(photo_data.get('audio_size')),
+ 'vcodec': 'none',
+ })
+
+ def add_common_info_to_list(l, template, id_field, id_value):
+ f_base = template % id_value
+ f_path = photo_data.get(f_base + 'download')
+ if not f_path:
+ return
+ l.append({
+ id_field: id_value,
+ 'url': base_url + f_path,
+ 'width': int_or_none(photo_data.get(f_base + 'width')),
+ 'height': int_or_none(photo_data.get(f_base + 'height')),
+ 'filesize': int_or_none(photo_data.get(f_base + 'size')),
+ })
+
+ for f in ('mobile_high', 'medium', 'hd', '1080p', '4k'):
+ add_common_info_to_list(formats, 'video_%s_', 'format_id', f)
+
+ thumbnails = []
+ for t in ('quad16', 'quad50', 'quad75', 'quad100', 'small', 'portrait', 'standard', 'medium', 'large', 'original'):
+ add_common_info_to_list(thumbnails, '%s_', 'id', t)
+
+ return {
+ 'id': photo_id,
+ 'title': title,
+ 'timestamp': int_or_none(photo_data.get('creation_date_epoch')),
+ 'duration': int_or_none(photo_data.get('video_length')),
+ 'view_count': int_or_none(photo_data.get('view_count')),
+ 'comment_count': int_or_none(photo_data.get('number_of_comments')),
+ 'uploader_id': photo_data.get('user_id'),
+ 'uploader': photo_data.get('display_name'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/twitcasting.py b/hypervideo_dl/extractor/twitcasting.py
new file mode 100644
index 0000000..6596eef
--- /dev/null
+++ b/hypervideo_dl/extractor/twitcasting.py
@@ -0,0 +1,111 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ parse_duration,
+ str_to_int,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class TwitCastingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
+ 'md5': '745243cad58c4681dc752490f7540d7f',
+ 'info_dict': {
+ 'id': '2357609',
+ 'ext': 'mp4',
+ 'title': 'Live #2357609',
+ 'uploader_id': 'ivetesangalo',
+ 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20110822',
+ 'timestamp': 1314010824,
+ 'duration': 32,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',
+ 'info_dict': {
+ 'id': '3689740',
+ 'ext': 'mp4',
+ 'title': 'Live playing something #3689740',
+ 'uploader_id': 'mttbernardini',
+ 'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20120212',
+ 'timestamp': 1329028024,
+ 'duration': 681,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'videopassword': 'abc',
+ },
+ }]
+
+ def _real_extract(self, url):
+ uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ video_password = self._downloader.params.get('videopassword')
+ request_data = None
+ if video_password:
+ request_data = urlencode_postdata({
+ 'password': video_password,
+ })
+ webpage = self._download_webpage(url, video_id, data=request_data)
+
+ title = clean_html(get_element_by_id(
+ 'movietitle', webpage)) or self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, fatal=True)
+
+ video_js_data = {}
+ m3u8_url = self._search_regex(
+ r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'm3u8 url', group='url', default=None)
+ if not m3u8_url:
+ video_js_data = self._parse_json(self._search_regex(
+ r"data-movie-playlist='(\[[^']+\])'",
+ webpage, 'movie playlist'), video_id)[0]
+ m3u8_url = video_js_data['source']['url']
+
+ # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls')
+
+ thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
+ description = clean_html(get_element_by_id(
+ 'authorcomment', webpage)) or self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage)
+ duration = float_or_none(video_js_data.get(
+ 'duration'), 1000) or parse_duration(clean_html(
+ get_element_by_class('tw-player-duration-time', webpage)))
+ view_count = str_to_int(self._search_regex(
+ r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None))
+ timestamp = unified_timestamp(self._search_regex(
+ r'data-toggle="true"[^>]+datetime="([^"]+)"',
+ webpage, 'datetime', None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/twitch.py b/hypervideo_dl/extractor/twitch.py
new file mode 100644
index 0000000..a378bd6
--- /dev/null
+++ b/hypervideo_dl/extractor/twitch.py
@@ -0,0 +1,988 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import collections
+import itertools
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urlparse,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ clean_html,
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class TwitchBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
+
+ _API_BASE = 'https://api.twitch.tv'
+ _USHER_BASE = 'https://usher.ttvnw.net'
+ _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
+ _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
+ _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
+ _NETRC_MACHINE = 'twitch'
+
+ _OPERATION_HASHES = {
+ 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
+ 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
+ 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
+ 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
+ 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
+ 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
+ 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
+ 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
+ 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
+ }
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def fail(message):
+ raise ExtractorError(
+ 'Unable to login. Twitch said: %s' % message, expected=True)
+
+ def login_step(page, urlh, note, data):
+ form = self._hidden_inputs(page)
+ form.update(data)
+
+ page_url = urlh.geturl()
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
+ 'post url', default=self._LOGIN_POST_URL, group='url')
+ post_url = urljoin(page_url, post_url)
+
+ headers = {
+ 'Referer': page_url,
+ 'Origin': 'https://www.twitch.tv',
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ }
+
+ response = self._download_json(
+ post_url, None, note, data=json.dumps(form).encode(),
+ headers=headers, expected_status=400)
+ error = dict_get(response, ('error', 'error_description', 'error_code'))
+ if error:
+ fail(error)
+
+ if 'Authenticated successfully' in response.get('message', ''):
+ return None, None
+
+ redirect_url = urljoin(
+ post_url,
+ response.get('redirect') or response['redirect_path'])
+ return self._download_webpage_handle(
+ redirect_url, None, 'Downloading login redirect page',
+ headers=headers)
+
+ login_page, handle = self._download_webpage_handle(
+ self._LOGIN_FORM_URL, None, 'Downloading login page')
+
+ # Some TOR nodes and public proxies are blocked completely
+ if 'blacklist_message' in login_page:
+ fail(clean_html(login_page))
+
+ redirect_page, handle = login_step(
+ login_page, handle, 'Logging in', {
+ 'username': username,
+ 'password': password,
+ 'client_id': self._CLIENT_ID,
+ })
+
+ # Successful login
+ if not redirect_page:
+ return
+
+ if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
+ # TODO: Add mechanism to request an SMS or phone call
+ tfa_token = self._get_tfa_info('two-factor authentication token')
+ login_step(redirect_page, handle, 'Submitting TFA token', {
+ 'authy_token': tfa_token,
+ 'remember_2fa': 'true',
+ })
+
+ def _prefer_source(self, formats):
+ try:
+ source = next(f for f in formats if f['format_id'] == 'Source')
+ source['quality'] = 10
+ except StopIteration:
+ for f in formats:
+ if '/chunked/' in f['url']:
+ f.update({
+ 'quality': 10,
+ 'format_note': 'Source',
+ })
+ self._sort_formats(formats)
+
+ def _download_base_gql(self, video_id, ops, note, fatal=True):
+ headers = {
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ 'Client-ID': self._CLIENT_ID,
+ }
+ gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
+ if gql_auth:
+ headers['Authorization'] = 'OAuth ' + gql_auth.value
+ return self._download_json(
+ 'https://gql.twitch.tv/gql', video_id, note,
+ data=json.dumps(ops).encode(),
+ headers=headers, fatal=fatal)
+
+ def _download_gql(self, video_id, ops, note, fatal=True):
+ for op in ops:
+ op['extensions'] = {
+ 'persistedQuery': {
+ 'version': 1,
+ 'sha256Hash': self._OPERATION_HASHES[op['operationName']],
+ }
+ }
+ return self._download_base_gql(video_id, ops, note)
+
+ def _download_access_token(self, video_id, token_kind, param_name):
+ method = '%sPlaybackAccessToken' % token_kind
+ ops = {
+ 'query': '''{
+ %s(
+ %s: "%s",
+ params: {
+ platform: "web",
+ playerBackend: "mediaplayer",
+ playerType: "site"
+ }
+ )
+ {
+ value
+ signature
+ }
+ }''' % (method, param_name, video_id),
+ }
+ return self._download_base_gql(
+ video_id, ops,
+ 'Downloading %s access token GraphQL' % token_kind)['data'][method]
+
+
+class TwitchVodIE(TwitchBaseIE):
+ IE_NAME = 'twitch:vod'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
+ player\.twitch\.tv/\?.*?\bvideo=v?
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
+ 'info_dict': {
+ 'id': 'v6528877',
+ 'ext': 'mp4',
+ 'title': 'LCK Summer Split - Week 6 Day 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 17208,
+ 'timestamp': 1435131734,
+ 'upload_date': '20150624',
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'view_count': int,
+ 'start_time': 310,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # Untitled broadcast (title is None)
+ 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
+ 'info_dict': {
+ 'id': 'v11230755',
+ 'ext': 'mp4',
+ 'title': 'Untitled Broadcast',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1638,
+ 'timestamp': 1439746708,
+ 'upload_date': '20150816',
+ 'uploader': 'BelkAO_o',
+ 'uploader_id': 'belkao_o',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/videos/6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/northernlion/video/291940395',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?video=480452374',
+ 'only_matching': True,
+ }]
+
+ def _download_info(self, item_id):
+ data = self._download_gql(
+ item_id, [{
+ 'operationName': 'VideoMetadata',
+ 'variables': {
+ 'channelLogin': '',
+ 'videoID': item_id,
+ },
+ }],
+ 'Downloading stream metadata GraphQL')[0]['data']
+ video = data.get('video')
+ if video is None:
+ raise ExtractorError(
+ 'Video %s does not exist' % item_id, expected=True)
+ return self._extract_info_gql(video, item_id)
+
+ @staticmethod
+ def _extract_info(info):
+ status = info.get('status')
+ if status == 'recording':
+ is_live = True
+ elif status == 'recorded':
+ is_live = False
+ else:
+ is_live = None
+ _QUALITIES = ('small', 'medium', 'large')
+ quality_key = qualities(_QUALITIES)
+ thumbnails = []
+ preview = info.get('preview')
+ if isinstance(preview, dict):
+ for thumbnail_id, thumbnail_url in preview.items():
+ thumbnail_url = url_or_none(thumbnail_url)
+ if not thumbnail_url:
+ continue
+ if thumbnail_id not in _QUALITIES:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail_id),
+ })
+ return {
+ 'id': info['_id'],
+ 'title': info.get('title') or 'Untitled Broadcast',
+ 'description': info.get('description'),
+ 'duration': int_or_none(info.get('length')),
+ 'thumbnails': thumbnails,
+ 'uploader': info.get('channel', {}).get('display_name'),
+ 'uploader_id': info.get('channel', {}).get('name'),
+ 'timestamp': parse_iso8601(info.get('recorded_at')),
+ 'view_count': int_or_none(info.get('views')),
+ 'is_live': is_live,
+ }
+
+ @staticmethod
+ def _extract_info_gql(info, item_id):
+ vod_id = info.get('id') or item_id
+ # id backward compatibility for download archives
+ if vod_id[0] != 'v':
+ vod_id = 'v%s' % vod_id
+ thumbnail = url_or_none(info.get('previewThumbnailURL'))
+ if thumbnail:
+ for p in ('width', 'height'):
+ thumbnail = thumbnail.replace('{%s}' % p, '0')
+ return {
+ 'id': vod_id,
+ 'title': info.get('title') or 'Untitled Broadcast',
+ 'description': info.get('description'),
+ 'duration': int_or_none(info.get('lengthSeconds')),
+ 'thumbnail': thumbnail,
+ 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
+ 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
+ 'timestamp': unified_timestamp(info.get('publishedAt')),
+ 'view_count': int_or_none(info.get('viewCount')),
+ }
+
+ def _real_extract(self, url):
+ vod_id = self._match_id(url)
+
+ info = self._download_info(vod_id)
+ access_token = self._download_access_token(vod_id, 'video', 'id')
+
+ formats = self._extract_m3u8_formats(
+ '%s/vod/%s.m3u8?%s' % (
+ self._USHER_BASE, vod_id,
+ compat_urllib_parse_urlencode({
+ 'allow_source': 'true',
+ 'allow_audio_only': 'true',
+ 'allow_spectre': 'true',
+ 'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
+ 'nauth': access_token['value'],
+ 'nauthsig': access_token['signature'],
+ })),
+ vod_id, 'mp4', entry_protocol='m3u8_native')
+
+ self._prefer_source(formats)
+ info['formats'] = formats
+
+ parsed_url = compat_urllib_parse_urlparse(url)
+ query = compat_parse_qs(parsed_url.query)
+ if 't' in query:
+ info['start_time'] = parse_duration(query['t'][0])
+
+ if info.get('timestamp') is not None:
+ info['subtitles'] = {
+ 'rechat': [{
+ 'url': update_url_query(
+ 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
+ 'client_id': self._CLIENT_ID,
+ }),
+ 'ext': 'json',
+ }],
+ }
+
+ return info
+
+
+def _make_video_result(node):
+ assert isinstance(node, dict)
+ video_id = node.get('id')
+ if not video_id:
+ return
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': TwitchVodIE.ie_key(),
+ 'id': video_id,
+ 'url': 'https://www.twitch.tv/videos/%s' % video_id,
+ 'title': node.get('title'),
+ 'thumbnail': node.get('previewThumbnailURL'),
+ 'duration': float_or_none(node.get('lengthSeconds')),
+ 'view_count': int_or_none(node.get('viewCount')),
+ }
+
+
+class TwitchCollectionIE(TwitchBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
+ 'info_dict': {
+ 'id': 'wlDCoH0zEBZZbQ',
+ 'title': 'Overthrow Nook, capitalism for children',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ _OPERATION_NAME = 'CollectionSideBar'
+
+ def _real_extract(self, url):
+ collection_id = self._match_id(url)
+ collection = self._download_gql(
+ collection_id, [{
+ 'operationName': self._OPERATION_NAME,
+ 'variables': {'collectionID': collection_id},
+ }],
+ 'Downloading collection GraphQL')[0]['data']['collection']
+ title = collection.get('title')
+ entries = []
+ for edge in collection['items']['edges']:
+ if not isinstance(edge, dict):
+ continue
+ node = edge.get('node')
+ if not isinstance(node, dict):
+ continue
+ video = _make_video_result(node)
+ if video:
+ entries.append(video)
+ return self.playlist_result(
+ entries, playlist_id=collection_id, playlist_title=title)
+
+
+class TwitchPlaylistBaseIE(TwitchBaseIE):
+ _PAGE_LIMIT = 100
+
+ def _entries(self, channel_name, *args):
+ cursor = None
+ variables_common = self._make_variables(channel_name, *args)
+ entries_key = '%ss' % self._ENTRY_KIND
+ for page_num in itertools.count(1):
+ variables = variables_common.copy()
+ variables['limit'] = self._PAGE_LIMIT
+ if cursor:
+ variables['cursor'] = cursor
+ page = self._download_gql(
+ channel_name, [{
+ 'operationName': self._OPERATION_NAME,
+ 'variables': variables,
+ }],
+ 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
+ fatal=False)
+ if not page:
+ break
+ edges = try_get(
+ page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
+ if not edges:
+ break
+ for edge in edges:
+ if not isinstance(edge, dict):
+ continue
+ if edge.get('__typename') != self._EDGE_KIND:
+ continue
+ node = edge.get('node')
+ if not isinstance(node, dict):
+ continue
+ if node.get('__typename') != self._NODE_KIND:
+ continue
+ entry = self._extract_entry(node)
+ if entry:
+ cursor = edge.get('cursor')
+ yield entry
+ if not cursor or not isinstance(cursor, compat_str):
+ break
+
+
+class TwitchVideosIE(TwitchPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
+
+ _TESTS = [{
+ # All Videos sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - All Videos sorted by Date',
+ },
+ 'playlist_mincount': 924,
+ }, {
+ # All Videos sorted by Popular
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - All Videos sorted by Popular',
+ },
+ 'playlist_mincount': 931,
+ }, {
+ # Past Broadcasts sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Past Broadcasts sorted by Date',
+ },
+ 'playlist_mincount': 27,
+ }, {
+ # Highlights sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Highlights sorted by Date',
+ },
+ 'playlist_mincount': 901,
+ }, {
+ # Uploads sorted by Date
+ 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
+ 'info_dict': {
+ 'id': 'esl_csgo',
+ 'title': 'esl_csgo - Uploads sorted by Date',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ # Past Premieres sorted by Date
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Past Premieres sorted by Date',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.twitch.tv/spamfish/videos/all',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/all',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/spamfish/videos',
+ 'only_matching': True,
+ }]
+
+ Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
+
+ _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
+ _BROADCASTS = {
+ 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
+ 'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
+ 'uploads': Broadcast('UPLOAD', 'Uploads'),
+ 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
+ 'all': _DEFAULT_BROADCAST,
+ }
+
+ _DEFAULT_SORTED_BY = 'Date'
+ _SORTED_BY = {
+ 'time': _DEFAULT_SORTED_BY,
+ 'views': 'Popular',
+ }
+
+ _OPERATION_NAME = 'FilterableVideoTower_Videos'
+ _ENTRY_KIND = 'video'
+ _EDGE_KIND = 'VideoEdge'
+ _NODE_KIND = 'Video'
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if any(ie.suitable(url) for ie in (
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE))
+ else super(TwitchVideosIE, cls).suitable(url))
+
+ @staticmethod
+ def _make_variables(channel_name, broadcast_type, sort):
+ return {
+ 'channelOwnerLogin': channel_name,
+ 'broadcastType': broadcast_type,
+ 'videoSort': sort.upper(),
+ }
+
+ @staticmethod
+ def _extract_entry(node):
+ return _make_video_result(node)
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ filter = qs.get('filter', ['all'])[0]
+ sort = qs.get('sort', ['time'])[0]
+ broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
+ return self.playlist_result(
+ self._entries(channel_name, broadcast.type, sort),
+ playlist_id=channel_name,
+ playlist_title='%s - %s sorted by %s'
+ % (channel_name, broadcast.label,
+ self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
+
+
+class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
+
+ _TESTS = [{
+ # Clips
+ 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
+ 'info_dict': {
+ 'id': 'vanillatv',
+ 'title': 'vanillatv - Clips Top All',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
+ 'only_matching': True,
+ }]
+
+ Clip = collections.namedtuple('Clip', ['filter', 'label'])
+
+ _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
+ _RANGE = {
+ '24hr': Clip('LAST_DAY', 'Top 24H'),
+ '7d': _DEFAULT_CLIP,
+ '30d': Clip('LAST_MONTH', 'Top 30D'),
+ 'all': Clip('ALL_TIME', 'Top All'),
+ }
+
+ # NB: values other than 20 result in skipped videos
+ _PAGE_LIMIT = 20
+
+ _OPERATION_NAME = 'ClipsCards__User'
+ _ENTRY_KIND = 'clip'
+ _EDGE_KIND = 'ClipEdge'
+ _NODE_KIND = 'Clip'
+
+ @staticmethod
+ def _make_variables(channel_name, filter):
+ return {
+ 'login': channel_name,
+ 'criteria': {
+ 'filter': filter,
+ },
+ }
+
+ @staticmethod
+ def _extract_entry(node):
+ assert isinstance(node, dict)
+ clip_url = url_or_none(node.get('url'))
+ if not clip_url:
+ return
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': TwitchClipsIE.ie_key(),
+ 'id': node.get('id'),
+ 'url': clip_url,
+ 'title': node.get('title'),
+ 'thumbnail': node.get('thumbnailURL'),
+ 'duration': float_or_none(node.get('durationSeconds')),
+ 'timestamp': unified_timestamp(node.get('createdAt')),
+ 'view_count': int_or_none(node.get('viewCount')),
+ 'language': node.get('language'),
+ }
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ range = qs.get('range', ['7d'])[0]
+ clip = self._RANGE.get(range, self._DEFAULT_CLIP)
+ return self.playlist_result(
+ self._entries(channel_name, clip.filter),
+ playlist_id=channel_name,
+ playlist_title='%s - Clips %s' % (channel_name, clip.label))
+
+
+class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
+
+ _TESTS = [{
+ # Collections
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'spamfish - Collections',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+ _OPERATION_NAME = 'ChannelCollectionsContent'
+ _ENTRY_KIND = 'collection'
+ _EDGE_KIND = 'CollectionsItemEdge'
+ _NODE_KIND = 'Collection'
+
+ @staticmethod
+ def _make_variables(channel_name):
+ return {
+ 'ownerLogin': channel_name,
+ }
+
+ @staticmethod
+ def _extract_entry(node):
+ assert isinstance(node, dict)
+ collection_id = node.get('id')
+ if not collection_id:
+ return
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': TwitchCollectionIE.ie_key(),
+ 'id': collection_id,
+ 'url': 'https://www.twitch.tv/collections/%s' % collection_id,
+ 'title': node.get('title'),
+ 'thumbnail': node.get('thumbnailURL'),
+ 'duration': float_or_none(node.get('lengthSeconds')),
+ 'timestamp': unified_timestamp(node.get('updatedAt')),
+ 'view_count': int_or_none(node.get('viewCount')),
+ }
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ return self.playlist_result(
+ self._entries(channel_name), playlist_id=channel_name,
+ playlist_title='%s - Collections' % channel_name)
+
+
+class TwitchStreamIE(TwitchBaseIE):
+ IE_NAME = 'twitch:stream'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:www|go|m)\.)?twitch\.tv/|
+ player\.twitch\.tv/\?.*?\bchannel=
+ )
+ (?P<id>[^/#?]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/shroomztv',
+ 'info_dict': {
+ 'id': '12772022048',
+ 'display_id': 'shroomztv',
+ 'ext': 'mp4',
+ 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
+ 'is_live': True,
+ 'timestamp': 1421928037,
+ 'upload_date': '20150122',
+ 'uploader': 'ShroomzTV',
+ 'uploader_id': 'shroomztv',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?channel=lotsofs',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/food',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/food',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if any(ie.suitable(url) for ie in (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchClipsIE))
+ else super(TwitchStreamIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url).lower()
+
+ gql = self._download_gql(
+ channel_name, [{
+ 'operationName': 'StreamMetadata',
+ 'variables': {'channelLogin': channel_name},
+ }, {
+ 'operationName': 'ComscoreStreamingQuery',
+ 'variables': {
+ 'channel': channel_name,
+ 'clipSlug': '',
+ 'isClip': False,
+ 'isLive': True,
+ 'isVodOrCollection': False,
+ 'vodID': '',
+ },
+ }, {
+ 'operationName': 'VideoPreviewOverlay',
+ 'variables': {'login': channel_name},
+ }],
+ 'Downloading stream GraphQL')
+
+ user = gql[0]['data']['user']
+
+ if not user:
+ raise ExtractorError(
+ '%s does not exist' % channel_name, expected=True)
+
+ stream = user['stream']
+
+ if not stream:
+ raise ExtractorError('%s is offline' % channel_name, expected=True)
+
+ access_token = self._download_access_token(
+ channel_name, 'stream', 'channelName')
+ token = access_token['value']
+
+ stream_id = stream.get('id') or channel_name
+ query = {
+ 'allow_source': 'true',
+ 'allow_audio_only': 'true',
+ 'allow_spectre': 'true',
+ 'p': random.randint(1000000, 10000000),
+ 'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
+ 'segment_preference': '4',
+ 'sig': access_token['signature'].encode('utf-8'),
+ 'token': token.encode('utf-8'),
+ }
+ formats = self._extract_m3u8_formats(
+ '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
+ stream_id, 'mp4', query=query)
+ self._prefer_source(formats)
+
+ view_count = stream.get('viewers')
+ timestamp = unified_timestamp(stream.get('createdAt'))
+
+ sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
+ uploader = sq_user.get('displayName')
+ description = try_get(
+ sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
+
+ thumbnail = url_or_none(try_get(
+ gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
+ compat_str))
+
+ title = uploader or channel_name
+ stream_type = stream.get('type')
+ if stream_type in ['rerun', 'live']:
+ title += ' (%s)' % stream_type
+
+ return {
+ 'id': stream_id,
+ 'display_id': channel_name,
+ 'title': self._live_title(title),
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': channel_name,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'is_live': stream_type == 'live',
+ }
+
+
+class TwitchClipsIE(TwitchBaseIE):
+ IE_NAME = 'twitch:clips'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
+ (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
+ )
+ (?P<id>[^/?#&]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
+ 'md5': '761769e1eafce0ffebfb4089cb3847cd',
+ 'info_dict': {
+ 'id': '42850523',
+ 'ext': 'mp4',
+ 'title': 'EA Play 2016 Live from the Novo Theatre',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1465767393,
+ 'upload_date': '20160612',
+ 'creator': 'EA',
+ 'uploader': 'stereotype_',
+ 'uploader_id': '43566419',
+ },
+ }, {
+ # multiple formats
+ 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_gql(
+ video_id, [{
+ 'operationName': 'VideoAccessToken_Clip',
+ 'variables': {
+ 'slug': video_id,
+ },
+ }],
+ 'Downloading clip access token GraphQL')[0]['data']['clip']
+
+ if not clip:
+ raise ExtractorError(
+ 'This clip is no longer available', expected=True)
+
+ access_query = {
+ 'sig': clip['playbackAccessToken']['signature'],
+ 'token': clip['playbackAccessToken']['value'],
+ }
+
+ data = self._download_base_gql(
+ video_id, {
+ 'query': '''{
+ clip(slug: "%s") {
+ broadcaster {
+ displayName
+ }
+ createdAt
+ curator {
+ displayName
+ id
+ }
+ durationSeconds
+ id
+ tiny: thumbnailURL(width: 86, height: 45)
+ small: thumbnailURL(width: 260, height: 147)
+ medium: thumbnailURL(width: 480, height: 272)
+ title
+ videoQualities {
+ frameRate
+ quality
+ sourceURL
+ }
+ viewCount
+ }
+}''' % video_id}, 'Downloading clip GraphQL', fatal=False)
+
+ if data:
+ clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
+
+ formats = []
+ for option in clip.get('videoQualities', []):
+ if not isinstance(option, dict):
+ continue
+ source = url_or_none(option.get('sourceURL'))
+ if not source:
+ continue
+ formats.append({
+ 'url': update_url_query(source, access_query),
+ 'format_id': option.get('quality'),
+ 'height': int_or_none(option.get('quality')),
+ 'fps': int_or_none(option.get('frameRate')),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumbnail_id in ('tiny', 'small', 'medium'):
+ thumbnail_url = clip.get(thumbnail_id)
+ if not thumbnail_url:
+ continue
+ thumb = {
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ }
+ mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
+ if mobj:
+ thumb.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ thumbnails.append(thumb)
+
+ return {
+ 'id': clip.get('id') or video_id,
+ 'title': clip.get('title') or video_id,
+ 'formats': formats,
+ 'duration': int_or_none(clip.get('durationSeconds')),
+ 'views': int_or_none(clip.get('viewCount')),
+ 'timestamp': unified_timestamp(clip.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
+ 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
+ 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
+ }
diff --git a/hypervideo_dl/extractor/twitter.py b/hypervideo_dl/extractor/twitter.py
new file mode 100644
index 0000000..cfa7a73
--- /dev/null
+++ b/hypervideo_dl/extractor/twitter.py
@@ -0,0 +1,669 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ strip_or_none,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ xpath_text,
+)
+
+from .periscope import (
+ PeriscopeBaseIE,
+ PeriscopeIE,
+)
+
+
+class TwitterBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.twitter.com/1.1/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
+ _GUEST_TOKEN = None
+
+ def _extract_variant_formats(self, variant, video_id):
+ variant_url = variant.get('url')
+ if not variant_url:
+ return []
+ elif '.m3u8' in variant_url:
+ return self._extract_m3u8_formats(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ else:
+ tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
+ f = {
+ 'url': variant_url,
+ 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'tbr': tbr,
+ }
+ self._search_dimensions_in_video_url(f, variant_url)
+ return [f]
+
+ def _extract_formats_from_vmap_url(self, vmap_url, video_id):
+ vmap_url = url_or_none(vmap_url)
+ if not vmap_url:
+ return []
+ vmap_data = self._download_xml(vmap_url, video_id)
+ formats = []
+ urls = []
+ for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
+ video_variant.attrib['url'] = compat_urllib_parse_unquote(
+ video_variant.attrib['url'])
+ urls.append(video_variant.attrib['url'])
+ formats.extend(self._extract_variant_formats(
+ video_variant.attrib, video_id))
+ video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
+ if video_url not in urls:
+ formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
+ return formats
+
+ @staticmethod
+ def _search_dimensions_in_video_url(a_format, video_url):
+ m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+ if m:
+ a_format.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ def _call_api(self, path, video_id, query={}):
+ headers = {
+ 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
+ }
+ if not self._GUEST_TOKEN:
+ self._GUEST_TOKEN = self._download_json(
+ self._API_BASE + 'guest/activate.json', video_id,
+ 'Downloading guest token', data=b'',
+ headers=headers)['guest_token']
+ headers['x-guest-token'] = self._GUEST_TOKEN
+ try:
+ return self._download_json(
+ self._API_BASE + path, video_id, headers=headers, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(),
+ video_id)['errors'][0]['message'], expected=True)
+ raise
+
+
+class TwitterCardIE(InfoExtractor):
+ IE_NAME = 'twitter:card'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+ # MD5 checksums are different in different places
+ 'info_dict': {
+ 'id': '560070183650213889',
+ 'ext': 'mp4',
+ 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
+ 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 30.033,
+ 'timestamp': 1422366112,
+ 'upload_date': '20150127',
+ },
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
+ 'md5': '7137eca597f72b9abbe61e5ae0161399',
+ 'info_dict': {
+ 'id': '623160978427936768',
+ 'ext': 'mp4',
+ 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
+ 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
+ 'uploader': 'NASA',
+ 'uploader_id': 'NASA',
+ 'timestamp': 1437408129,
+ 'upload_date': '20150720',
+ },
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
+ 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
+ 'info_dict': {
+ 'id': 'dq4Oj5quskI',
+ 'ext': 'mp4',
+ 'title': 'Ubuntu 11.10 Overview',
+ 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
+ 'upload_date': '20111013',
+ 'uploader': 'OMG! UBUNTU!',
+ 'uploader_id': 'omgubuntu',
+ },
+ 'add_ie': ['Youtube'],
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
+ 'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
+ 'info_dict': {
+ 'id': 'iBb2x00UVlv',
+ 'ext': 'mp4',
+ 'upload_date': '20151113',
+ 'uploader_id': '1189339351084113920',
+ 'uploader': 'ArsenalTerje',
+ 'title': 'Vine by ArsenalTerje',
+ 'timestamp': 1447451307,
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
+ 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
+ 'info_dict': {
+ 'id': '705235433198714880',
+ 'ext': 'mp4',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader': 'Brent Yarina',
+ 'uploader_id': 'BTNBrentYarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
+ },
+ 'skip': 'This content is no longer available.',
+ }, {
+ 'url': 'https://twitter.com/i/videos/752274308186120192',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ status_id = self._match_id(url)
+ return self.url_result(
+ 'https://twitter.com/statuses/' + status_id,
+ TwitterIE.ie_key(), status_id)
+
+
+class TwitterIE(TwitterBaseIE):
+ IE_NAME = 'twitter'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
+ 'info_dict': {
+ 'id': '643211948184596480',
+ 'ext': 'mp4',
+ 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
+ 'uploader': 'FREE THE NIPPLE',
+ 'uploader_id': 'freethenipple',
+ 'duration': 12.922,
+ 'timestamp': 1442188653,
+ 'upload_date': '20150913',
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
+ 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
+ 'info_dict': {
+ 'id': '657991469417025536',
+ 'ext': 'mp4',
+ 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
+ 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'uploader': 'Gifs',
+ 'uploader_id': 'giphz',
+ },
+ 'expected_warnings': ['height', 'width'],
+ 'skip': 'Account suspended',
+ }, {
+ 'url': 'https://twitter.com/starwars/status/665052190608723968',
+ 'info_dict': {
+ 'id': '665052190608723968',
+ 'ext': 'mp4',
+ 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
+ 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
+ 'uploader_id': 'starwars',
+ 'uploader': 'Star Wars',
+ 'timestamp': 1447395772,
+ 'upload_date': '20151113',
+ },
+ }, {
+ 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
+ 'info_dict': {
+ 'id': '705235433198714880',
+ 'ext': 'mp4',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader_id': 'BTNBrentYarina',
+ 'uploader': 'Brent Yarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
+ },
+ 'params': {
+ # The same video as https://twitter.com/i/videos/tweet/705235433198714880
+ # Test case of TwitterCardIE
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
+ 'info_dict': {
+ 'id': '700207533655363584',
+ 'ext': 'mp4',
+ 'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel',
+ 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'simon vertugo',
+ 'uploader_id': 'simonvertugo',
+ 'duration': 30.0,
+ 'timestamp': 1455777459,
+ 'upload_date': '20160218',
+ },
+ }, {
+ 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
+ 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
+ 'info_dict': {
+ 'id': 'MIOxnrUteUd',
+ 'ext': 'mp4',
+ 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
+ 'uploader': 'TAKUMA',
+ 'uploader_id': '1004126642786242560',
+ 'timestamp': 1402826626,
+ 'upload_date': '20140615',
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
+ 'info_dict': {
+ 'id': '719944021058060289',
+ 'ext': 'mp4',
+ 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
+ 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
+ 'uploader_id': 'CaptainAmerica',
+ 'uploader': 'Captain America',
+ 'duration': 3.17,
+ 'timestamp': 1460483005,
+ 'upload_date': '20160412',
+ },
+ }, {
+ 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
+ 'info_dict': {
+ 'id': '1zqKVVlkqLaKB',
+ 'ext': 'mp4',
+ 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
+ 'upload_date': '20160923',
+ 'uploader_id': '1PmKqpJdOJQoY',
+ 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
+ 'timestamp': 1474613214,
+ },
+ 'add_ie': ['Periscope'],
+ }, {
+ # has mp4 formats via mobile API
+ 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
+ 'info_dict': {
+ 'id': '852138619213144067',
+ 'ext': 'mp4',
+ 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
+ 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
+ 'uploader': 'عالم الأخبار',
+ 'uploader_id': 'news_al3alm',
+ 'duration': 277.4,
+ 'timestamp': 1492000653,
+ 'upload_date': '20170412',
+ },
+ 'skip': 'Account suspended',
+ }, {
+ 'url': 'https://twitter.com/i/web/status/910031516746514432',
+ 'info_dict': {
+ 'id': '910031516746514432',
+ 'ext': 'mp4',
+ 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
+ 'uploader': 'Préfet de Guadeloupe',
+ 'uploader_id': 'Prefet971',
+ 'duration': 47.48,
+ 'timestamp': 1505803395,
+ 'upload_date': '20170919',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ # card via api.twitter.com/1.1/videos/tweet/config
+ 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
+ 'info_dict': {
+ 'id': '1001551623938805763',
+ 'ext': 'mp4',
+ 'title': 're:.*?Shep is on a roll today.*?',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
+ 'uploader': 'Lis Power',
+ 'uploader_id': 'LisPower1',
+ 'duration': 111.278,
+ 'timestamp': 1527623489,
+ 'upload_date': '20180529',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ 'url': 'https://twitter.com/foobar/status/1087791357756956680',
+ 'info_dict': {
+ 'id': '1087791357756956680',
+ 'ext': 'mp4',
+ 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'duration': 61.567,
+ 'timestamp': 1548184644,
+ 'upload_date': '20190122',
+ },
+ }, {
+ # not available in Periscope
+ 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
+ 'info_dict': {
+ 'id': '1vOGwqejwoWxB',
+ 'ext': 'mp4',
+ 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
+ 'uploader': 'Vivi',
+ 'uploader_id': '1eVjYOLGkGrQL',
+ },
+ 'add_ie': ['TwitterBroadcast'],
+ }, {
+ # unified card
+ 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
+ 'info_dict': {
+ 'id': '1349794411333394432',
+ 'ext': 'mp4',
+ 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
+ 'uploader': 'Brooklyn Nets',
+ 'uploader_id': 'BrooklynNets',
+ 'duration': 324.484,
+ 'timestamp': 1610651040,
+ 'upload_date': '20210114',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Twitch Clip Embed
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }, {
+ # promo_video_website card
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }, {
+ # promo_video_convo card
+ 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
+ 'only_matching': True,
+ }, {
+ # appplayer card
+ 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
+ 'only_matching': True,
+ }, {
+ # video_direct_message card
+ 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
+ 'only_matching': True,
+ }, {
+ # poll2choice_video card
+ 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
+ 'only_matching': True,
+ }, {
+ # poll3choice_video card
+ 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
+ 'only_matching': True,
+ }, {
+ # poll4choice_video card
+ 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ twid = self._match_id(url)
+ status = self._call_api(
+ 'statuses/show/%s.json' % twid, twid, {
+ 'cards_platform': 'Web-12',
+ 'include_cards': 1,
+ 'include_reply_count': 1,
+ 'include_user_entities': 0,
+ 'tweet_mode': 'extended',
+ })
+
+ title = description = status['full_text'].replace('\n', ' ')
+ # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
+ title = re.sub(r'\s+(https?://[^ ]+)', '', title)
+ user = status.get('user') or {}
+ uploader = user.get('name')
+ if uploader:
+ title = '%s - %s' % (uploader, title)
+ uploader_id = user.get('screen_name')
+
+ tags = []
+ for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
+ hashtag_text = hashtag.get('text')
+ if not hashtag_text:
+ continue
+ tags.append(hashtag_text)
+
+ info = {
+ 'id': twid,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': unified_timestamp(status.get('created_at')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None,
+ 'like_count': int_or_none(status.get('favorite_count')),
+ 'repost_count': int_or_none(status.get('retweet_count')),
+ 'comment_count': int_or_none(status.get('reply_count')),
+ 'age_limit': 18 if status.get('possibly_sensitive') else 0,
+ 'tags': tags,
+ }
+
+ def extract_from_video_info(media):
+ video_info = media.get('video_info') or {}
+
+ formats = []
+ for variant in video_info.get('variants', []):
+ formats.extend(self._extract_variant_formats(variant, twid))
+ self._sort_formats(formats)
+
+ thumbnails = []
+ media_url = media.get('media_url_https') or media.get('media_url')
+ if media_url:
+ def add_thumbnail(name, size):
+ thumbnails.append({
+ 'id': name,
+ 'url': update_url_query(media_url, {'name': name}),
+ 'width': int_or_none(size.get('w') or size.get('width')),
+ 'height': int_or_none(size.get('h') or size.get('height')),
+ })
+ for name, size in media.get('sizes', {}).items():
+ add_thumbnail(name, size)
+ add_thumbnail('orig', media.get('original_info') or {})
+
+ info.update({
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(video_info.get('duration_millis'), 1000),
+ })
+
+ media = try_get(status, lambda x: x['extended_entities']['media'][0])
+ if media and media.get('type') != 'photo':
+ extract_from_video_info(media)
+ else:
+ card = status.get('card')
+ if card:
+ binding_values = card['binding_values']
+
+ def get_binding_value(k):
+ o = binding_values.get(k) or {}
+ return try_get(o, lambda x: x[x['type'].lower() + '_value'])
+
+ card_name = card['name'].split(':')[-1]
+ if card_name == 'player':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('player_url'),
+ })
+ elif card_name == 'periscope_broadcast':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('url') or get_binding_value('player_url'),
+ 'ie_key': PeriscopeIE.ie_key(),
+ })
+ elif card_name == 'broadcast':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('broadcast_url'),
+ 'ie_key': TwitterBroadcastIE.ie_key(),
+ })
+ elif card_name == 'summary':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('card_url'),
+ })
+ elif card_name == 'unified_card':
+ media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
+ extract_from_video_info(next(iter(media_entities.values())))
+ # amplify, promo_video_website, promo_video_convo, appplayer,
+ # video_direct_message, poll2choice_video, poll3choice_video,
+ # poll4choice_video, ...
+ else:
+ is_amplify = card_name == 'amplify'
+ vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
+ content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
+ formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for suffix in ('_small', '', '_large', '_x_large', '_original'):
+ image = get_binding_value('player_image' + suffix) or {}
+ image_url = image.get('url')
+ if not image_url or '/player-placeholder' in image_url:
+ continue
+ thumbnails.append({
+ 'id': suffix[1:] if suffix else 'medium',
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(get_binding_value(
+ 'content_duration_seconds')),
+ })
+ else:
+ expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
+ if not expanded_url:
+ raise ExtractorError("There's no video in this tweet.")
+ info.update({
+ '_type': 'url',
+ 'url': expanded_url,
+ })
+ return info
+
+
+class TwitterAmplifyIE(TwitterBaseIE):
+ IE_NAME = 'twitter:amplify'
+ _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
+
+ _TEST = {
+ 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
+ 'md5': '7df102d0b9fd7066b86f3159f8e81bf6',
+ 'info_dict': {
+ 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
+ 'ext': 'mp4',
+ 'title': 'Twitter Video',
+ 'thumbnail': 're:^https?://.*',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ vmap_url = self._html_search_meta(
+ 'twitter:amplify:vmap', webpage, 'vmap url')
+ formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
+
+ thumbnails = []
+ thumbnail = self._html_search_meta(
+ 'twitter:image:src', webpage, 'thumbnail', fatal=False)
+
+ def _find_dimension(target):
+ w = int_or_none(self._html_search_meta(
+ 'twitter:%s:width' % target, webpage, fatal=False))
+ h = int_or_none(self._html_search_meta(
+ 'twitter:%s:height' % target, webpage, fatal=False))
+ return w, h
+
+ if thumbnail:
+ thumbnail_w, thumbnail_h = _find_dimension('image')
+ thumbnails.append({
+ 'url': thumbnail,
+ 'width': thumbnail_w,
+ 'height': thumbnail_h,
+ })
+
+ video_w, video_h = _find_dimension('player')
+ formats[0].update({
+ 'width': video_w,
+ 'height': video_h,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': 'Twitter Video',
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
+ IE_NAME = 'twitter:broadcast'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
+
+ _TEST = {
+ # untitled Periscope video
+ 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
+ 'info_dict': {
+ 'id': '1yNGaQLWpejGj',
+ 'ext': 'mp4',
+ 'title': 'Andrea May Sahouri - Periscope Broadcast',
+ 'uploader': 'Andrea May Sahouri',
+ 'uploader_id': '1PXEdBZWpGwKe',
+ },
+ }
+
+ def _real_extract(self, url):
+ broadcast_id = self._match_id(url)
+ broadcast = self._call_api(
+ 'broadcasts/show.json', broadcast_id,
+ {'ids': broadcast_id})['broadcasts'][broadcast_id]
+ info = self._parse_broadcast_data(broadcast, broadcast_id)
+ media_key = broadcast['media_key']
+ source = self._call_api(
+ 'live_video_stream/status/' + media_key, media_key)['source']
+ m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
+ if '/live_video_stream/geoblocked/' in m3u8_url:
+ self.raise_geo_restricted()
+ m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
+ m3u8_url).query).get('type', [None])[0]
+ state, width, height = self._extract_common_format_info(broadcast)
+ info['formats'] = self._extract_pscp_m3u8_formats(
+ m3u8_url, broadcast_id, m3u8_id, state, width, height)
+ return info
diff --git a/hypervideo_dl/extractor/udemy.py b/hypervideo_dl/extractor/udemy.py
new file mode 100644
index 0000000..bc5059b
--- /dev/null
+++ b/hypervideo_dl/extractor/udemy.py
@@ -0,0 +1,481 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_kwargs,
+ compat_str,
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ sanitized_Request,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class UdemyIE(InfoExtractor):
+ IE_NAME = 'udemy'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?udemy\.com/
+ (?:
+ [^#]+\#/lecture/|
+ lecture/view/?\?lectureId=|
+ [^/]+/learn/v4/t/lecture/
+ )
+ (?P<id>\d+)
+ '''
+ _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+ _ORIGIN_URL = 'https://www.udemy.com'
+ _NETRC_MACHINE = 'udemy'
+
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
+ 'md5': '98eda5b657e752cf945d8445e261b5c5',
+ 'info_dict': {
+ 'id': '160614',
+ 'ext': 'mp4',
+ 'title': 'Introduction and Installation',
+ 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
+ 'duration': 579.29,
+ },
+ 'skip': 'Requires udemy account credentials',
+ }, {
+ # new URL schema
+ 'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
+ 'only_matching': True,
+ }, {
+ # no url in outputs format entry
+ 'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
+ 'only_matching': True,
+ }, {
+ # only outputs rendition
+ 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
+ 'only_matching': True,
+ }]
+
+ def _extract_course_info(self, webpage, video_id):
+ course = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
+ webpage, 'course', default='{}')),
+ video_id, fatal=False) or {}
+ course_id = course.get('id') or self._search_regex(
+ [
+ r'data-course-id=["\'](\d+)',
+ r'&quot;courseId&quot;\s*:\s*(\d+)'
+ ], webpage, 'course id')
+ return course_id, course.get('title')
+
+ def _enroll_course(self, base_url, webpage, course_id):
+ def combine_url(base_url, url):
+ return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
+
+ checkout_url = unescapeHTML(self._search_regex(
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
+ webpage, 'checkout url', group='url', default=None))
+ if checkout_url:
+ raise ExtractorError(
+ 'Course %s is not free. You have to pay for it before you can download. '
+ 'Use this URL to confirm purchase: %s'
+ % (course_id, combine_url(base_url, checkout_url)),
+ expected=True)
+
+ enroll_url = unescapeHTML(self._search_regex(
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
+ webpage, 'enroll url', group='url', default=None))
+ if enroll_url:
+ webpage = self._download_webpage(
+ combine_url(base_url, enroll_url),
+ course_id, 'Enrolling in the course',
+ headers={'Referer': base_url})
+ if '>You have enrolled in' in webpage:
+ self.to_screen('%s: Successfully enrolled in the course' % course_id)
+
+ def _download_lecture(self, course_id, lecture_id):
+ return self._download_json(
+ 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?'
+ % (course_id, lecture_id),
+ lecture_id, 'Downloading lecture JSON', query={
+ 'fields[lecture]': 'title,description,view_html,asset',
+ 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
+ })
+
+ def _handle_error(self, response):
+ if not isinstance(response, dict):
+ return
+ error = response.get('error')
+ if error:
+ error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
+ error_data = error.get('data')
+ if error_data:
+ error_str += ' - %s' % error_data.get('formErrors')
+ raise ExtractorError(error_str, expected=True)
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = kwargs.get('headers', {}).copy()
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
+ kwargs['headers'] = headers
+ ret = super(UdemyIE, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
+ if not ret:
+ return ret
+ webpage, _ = ret
+ if any(p in webpage for p in (
+ '>Please verify you are a human',
+ 'Access to this page has been denied because we believe you are using automation tools to browse the website',
+ '"_pxCaptcha"')):
+ raise ExtractorError(
+ 'Udemy asks you to solve a CAPTCHA. Login with browser, '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'hypervideo with --cookies.', expected=True)
+ return ret
+
+ def _download_json(self, url_or_request, *args, **kwargs):
+ headers = {
+ 'X-Udemy-Snail-Case': 'true',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ for cookie in self._downloader.cookiejar:
+ if cookie.name == 'client_id':
+ headers['X-Udemy-Client-Id'] = cookie.value
+ elif cookie.name == 'access_token':
+ headers['X-Udemy-Bearer-Token'] = cookie.value
+ headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
+
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ for header, value in headers.items():
+ url_or_request.add_header(header, value)
+ else:
+ url_or_request = sanitized_Request(url_or_request, headers=headers)
+
+ response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
+ self._handle_error(response)
+ return response
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
+ r'>Logout<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._form_hidden_inputs('login-form', login_popup)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Referer': self._ORIGIN_URL,
+ 'Origin': self._ORIGIN_URL,
+ })
+
+ if not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ course_id, _ = self._extract_course_info(webpage, lecture_id)
+
+ try:
+ lecture = self._download_lecture(course_id, lecture_id)
+ except ExtractorError as e:
+ # Error could possibly mean we are not enrolled in the course
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self._enroll_course(url, webpage, course_id)
+ lecture = self._download_lecture(course_id, lecture_id)
+ else:
+ raise
+
+ title = lecture['title']
+ description = lecture.get('description')
+
+ asset = lecture['asset']
+
+ asset_type = asset.get('asset_type') or asset.get('assetType')
+ if asset_type != 'Video':
+ raise ExtractorError(
+ 'Lecture %s is not a video' % lecture_id, expected=True)
+
+ stream_url = asset.get('stream_url') or asset.get('streamUrl')
+ if stream_url:
+ youtube_url = self._search_regex(
+ r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
+
+ video_id = compat_str(asset['id'])
+ thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
+ duration = float_or_none(asset.get('data', {}).get('duration'))
+
+ subtitles = {}
+ automatic_captions = {}
+
+ formats = []
+
+ def extract_output_format(src, f_id):
+ return {
+ 'url': src.get('url'),
+ 'format_id': '%sp' % (src.get('height') or f_id),
+ 'width': int_or_none(src.get('width')),
+ 'height': int_or_none(src.get('height')),
+ 'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
+ 'vcodec': src.get('video_codec'),
+ 'fps': int_or_none(src.get('frame_rate')),
+ 'abr': int_or_none(src.get('audio_bitrate_in_kbps')),
+ 'acodec': src.get('audio_codec'),
+ 'asr': int_or_none(src.get('audio_sample_rate')),
+ 'tbr': int_or_none(src.get('total_bitrate_in_kbps')),
+ 'filesize': int_or_none(src.get('file_size_in_bytes')),
+ }
+
+ outputs = asset.get('data', {}).get('outputs')
+ if not isinstance(outputs, dict):
+ outputs = {}
+
+ def add_output_format_meta(f, key):
+ output = outputs.get(key)
+ if isinstance(output, dict):
+ output_format = extract_output_format(output, key)
+ output_format.update(f)
+ return output_format
+ return f
+
+ def extract_formats(source_list):
+ if not isinstance(source_list, list):
+ return
+ for source in source_list:
+ video_url = url_or_none(source.get('file') or source.get('src'))
+ if not video_url:
+ continue
+ if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ format_id = source.get('label')
+ f = {
+ 'url': video_url,
+ 'format_id': '%sp' % format_id,
+ 'height': int_or_none(format_id),
+ }
+ if format_id:
+ # Some videos contain additional metadata (e.g.
+ # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
+ f = add_output_format_meta(f, format_id)
+ formats.append(f)
+
+ def extract_subtitles(track_list):
+ if not isinstance(track_list, list):
+ return
+ for track in track_list:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ src = url_or_none(track.get('src'))
+ if not src:
+ continue
+ lang = track.get('language') or track.get(
+ 'srclang') or track.get('label')
+ sub_dict = automatic_captions if track.get(
+ 'autogenerated') is True else subtitles
+ sub_dict.setdefault(lang, []).append({
+ 'url': src,
+ })
+
+ for url_kind in ('download', 'stream'):
+ urls = asset.get('%s_urls' % url_kind)
+ if isinstance(urls, dict):
+ extract_formats(urls.get('Video'))
+
+ captions = asset.get('captions')
+ if isinstance(captions, list):
+ for cc in captions:
+ if not isinstance(cc, dict):
+ continue
+ cc_url = url_or_none(cc.get('url'))
+ if not cc_url:
+ continue
+ lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
+ sub_dict = (automatic_captions if cc.get('source') == 'auto'
+ else subtitles)
+ sub_dict.setdefault(lang or 'en', []).append({
+ 'url': cc_url,
+ })
+
+ view_html = lecture.get('view_html')
+ if view_html:
+ view_html_urls = set()
+ for source in re.findall(r'<source[^>]+>', view_html):
+ attributes = extract_attributes(source)
+ src = attributes.get('src')
+ if not src:
+ continue
+ res = attributes.get('data-res')
+ height = int_or_none(res)
+ if src in view_html_urls:
+ continue
+ view_html_urls.add(src)
+ if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
+ if m:
+ if not f.get('height'):
+ f['height'] = int(m.group('height'))
+ if not f.get('tbr'):
+ f['tbr'] = int(m.group('tbr'))
+ formats.extend(m3u8_formats)
+ else:
+ formats.append(add_output_format_meta({
+ 'url': src,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ }, res))
+
+ # react rendition since 2017.04.15 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/12744)
+ data = self._parse_json(
+ self._search_regex(
+ r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
+ 'setup data', default='{}', group='data'), video_id,
+ transform_source=unescapeHTML, fatal=False)
+ if data and isinstance(data, dict):
+ extract_formats(data.get('sources'))
+ if not duration:
+ duration = int_or_none(data.get('duration'))
+ extract_subtitles(data.get('tracks'))
+
+ if not subtitles and not automatic_captions:
+ text_tracks = self._parse_json(
+ self._search_regex(
+ r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
+ 'text tracks', default='{}', group='data'), video_id,
+ transform_source=lambda s: js_to_json(unescapeHTML(s)),
+ fatal=False)
+ extract_subtitles(text_tracks)
+
+ if not formats and outputs:
+ for format_id, output in outputs.items():
+ f = extract_output_format(output, format_id)
+ if f.get('url'):
+ formats.append(f)
+
+ self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class UdemyCourseIE(UdemyIE):
+ IE_NAME = 'udemy:course'
+ _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_path = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_path)
+
+ course_id, title = self._extract_course_info(webpage, course_path)
+
+ self._enroll_course(url, webpage, course_id)
+
+ response = self._download_json(
+ 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
+ course_id, 'Downloading course curriculum', query={
+ 'fields[chapter]': 'title,object_index',
+ 'fields[lecture]': 'title,asset',
+ 'page_size': '1000',
+ })
+
+ entries = []
+ chapter, chapter_number = [None] * 2
+ for entry in response['results']:
+ clazz = entry.get('_class')
+ if clazz == 'lecture':
+ asset = entry.get('asset')
+ if isinstance(asset, dict):
+ asset_type = asset.get('asset_type') or asset.get('assetType')
+ if asset_type != 'Video':
+ continue
+ lecture_id = entry.get('id')
+ if lecture_id:
+ entry = {
+ '_type': 'url_transparent',
+ 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
+ 'title': entry.get('title'),
+ 'ie_key': UdemyIE.ie_key(),
+ }
+ if chapter_number:
+ entry['chapter_number'] = chapter_number
+ if chapter:
+ entry['chapter'] = chapter
+ entries.append(entry)
+ elif clazz == 'chapter':
+ chapter_number = entry.get('object_index')
+ chapter = entry.get('title')
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/hypervideo_dl/extractor/udn.py b/hypervideo_dl/extractor/udn.py
new file mode 100644
index 0000000..2c8e5c7
--- /dev/null
+++ b/hypervideo_dl/extractor/udn.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+)
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+ IE_DESC = '聯合影音'
+ _PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
+ _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
+ _TESTS = [{
+ 'url': 'http://video.udn.com/embed/news/300040',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
+ }, {
+ 'url': 'https://video.udn.com/embed/news/300040',
+ 'only_matching': True,
+ }, {
+ # From https://video.udn.com/news/303776
+ 'url': 'https://video.udn.com/play/news/303776',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page = self._download_webpage(url, video_id)
+
+ options_str = self._html_search_regex(
+ r'var\s+options\s*=\s*([^;]+);', page, 'options')
+ trans_options_str = js_to_json(options_str)
+ options = self._parse_json(trans_options_str, 'options', fatal=False) or {}
+ if options:
+ video_urls = options['video']
+ title = options['title']
+ poster = options.get('poster')
+ else:
+ video_urls = self._parse_json(self._html_search_regex(
+ r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls')
+ title = self._html_search_regex(
+ r"title\s*:\s*'(.+?)'\s*,", options_str, 'title')
+ poster = self._html_search_regex(
+ r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None)
+
+ if video_urls.get('youtube'):
+ return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+ formats = []
+ for video_type, api_url in video_urls.items():
+ if not api_url:
+ continue
+
+ video_url = self._download_webpage(
+ compat_urlparse.urljoin(url, api_url), video_id,
+ note='retrieve url for %s video' % video_type)
+
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id='hls'))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds'))
+ else:
+ mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+)\.mp4', video_url)
+ a_format = {
+ 'url': video_url,
+ # video_type may be 'mp4', which confuses YoutubeDL
+ 'format_id': 'http-' + video_type,
+ }
+ if mobj:
+ a_format.update({
+ 'height': int_or_none(mobj.group('height')),
+ 'tbr': int_or_none(mobj.group('tbr')),
+ })
+ formats.append(a_format)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': poster,
+ }
diff --git a/hypervideo_dl/extractor/ufctv.py b/hypervideo_dl/extractor/ufctv.py
new file mode 100644
index 0000000..3d74ba0
--- /dev/null
+++ b/hypervideo_dl/extractor/ufctv.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .imggaming import ImgGamingBaseIE
+
+
+class UFCTVIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
+ _NETRC_MACHINE = 'ufctv'
+ _REALM = 'ufc'
+
+
+class UFCArabiaIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
+ _NETRC_MACHINE = 'ufcarabia'
+ _REALM = 'admufc'
diff --git a/hypervideo_dl/extractor/uktvplay.py b/hypervideo_dl/extractor/uktvplay.py
new file mode 100644
index 0000000..f28fd51
--- /dev/null
+++ b/hypervideo_dl/extractor/uktvplay.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class UKTVPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
+ 'info_dict': {
+ 'id': '2117008346001',
+ 'ext': 'mp4',
+ 'title': 'Pincers',
+ 'description': 'Pincers',
+ 'uploader_id': '1242911124001',
+ 'upload_date': '20130124',
+ 'timestamp': 1359049267,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest']
+ }, {
+ 'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
+ 'only_matching': True,
+ }]
+ # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % video_id,
+ 'BrightcoveNew', video_id)
diff --git a/hypervideo_dl/extractor/umg.py b/hypervideo_dl/extractor/umg.py
new file mode 100644
index 0000000..d815cd9
--- /dev/null
+++ b/hypervideo_dl/extractor/umg.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_filesize,
+ parse_iso8601,
+)
+
+
+class UMGDeIE(InfoExtractor):
+ IE_NAME = 'umg:de'
+ IE_DESC = 'Universal Music Deutschland'
+ _VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
+ 'md5': 'ebd90f48c80dcc82f77251eb1902634f',
+ 'info_dict': {
+ 'id': '457803',
+ 'ext': 'mp4',
+ 'title': 'Jedes Wort ist Gold wert',
+ 'timestamp': 1513591800,
+ 'upload_date': '20171218',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://api.universal-music.de/graphql',
+ video_id, query={
+ 'query': '''{
+ universalMusic(channel:16) {
+ video(id:%s) {
+ headline
+ formats {
+ formatId
+ url
+ type
+ width
+ height
+ mimeType
+ fileSize
+ }
+ duration
+ createdDate
+ }
+ }
+}''' % video_id})['data']['universalMusic']['video']
+
+ title = video_data['headline']
+ hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
+
+ thumbnails = []
+ formats = []
+
+ def add_m3u8_format(format_id):
+ m3u8_formats = self._extract_m3u8_formats(
+ hls_url_template % format_id, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal='False')
+ if m3u8_formats and m3u8_formats[0].get('height'):
+ formats.extend(m3u8_formats)
+
+ for f in video_data.get('formats', []):
+ f_url = f.get('url')
+ mime_type = f.get('mimeType')
+ if not f_url or mime_type == 'application/mxf':
+ continue
+ fmt = {
+ 'url': f_url,
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'filesize': parse_filesize(f.get('fileSize')),
+ }
+ f_type = f.get('type')
+ if f_type == 'Image':
+ thumbnails.append(fmt)
+ elif f_type == 'Video':
+ format_id = f.get('formatId')
+ if format_id:
+ fmt['format_id'] = format_id
+ if mime_type == 'video/mp4':
+ add_m3u8_format(format_id)
+ urlh = self._request_webpage(f_url, video_id, fatal=False)
+ if urlh:
+ first_byte = urlh.read(1)
+ if first_byte not in (b'F', b'\x00'):
+ continue
+ formats.append(fmt)
+ if not formats:
+ for format_id in (867, 836, 940):
+ add_m3u8_format(format_id)
+ self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': parse_iso8601(video_data.get('createdDate'), ' '),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/unistra.py b/hypervideo_dl/extractor/unistra.py
new file mode 100644
index 0000000..a724cdb
--- /dev/null
+++ b/hypervideo_dl/extractor/unistra.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import qualities
+
+
+class UnistraIE(InfoExtractor):
+ _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://utv.unistra.fr/video.php?id_video=154',
+ 'md5': '736f605cfdc96724d55bb543ab3ced24',
+ 'info_dict': {
+ 'id': '154',
+ 'ext': 'mp4',
+ 'title': 'M!ss Yella',
+ 'description': 'md5:104892c71bd48e55d70b902736b81bbf',
+ },
+ },
+ {
+ 'url': 'http://utv.unistra.fr/index.php?id_video=437',
+ 'md5': '1ddddd6cccaae76f622ce29b8779636d',
+ 'info_dict': {
+ 'id': '437',
+ 'ext': 'mp4',
+ 'title': 'Prix Louise Weiss 2014',
+ 'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
+
+ quality = qualities(['SD', 'HD'])
+ formats = []
+ for file_path in files:
+ format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
+ formats.append({
+ 'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
+ 'format_id': format_id,
+ 'quality': quality(format_id)
+ })
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ r'<title>UTV - (.*?)</', webpage, 'title')
+ description = self._html_search_regex(
+ r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
+ thumbnail = self._search_regex(
+ r'image: "(.*?)"', webpage, 'thumbnail')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/unity.py b/hypervideo_dl/extractor/unity.py
new file mode 100644
index 0000000..73daacf
--- /dev/null
+++ b/hypervideo_dl/extractor/unity.py
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class UnityIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
+ 'info_dict': {
+ 'id': 'jWuNtik0C8E',
+ 'ext': 'mp4',
+ 'title': 'Live Training 22nd September 2014 - Animate Anything',
+ 'description': 'md5:e54913114bd45a554c56cdde7669636e',
+ 'duration': 2893,
+ 'uploader': 'Unity',
+ 'uploader_id': 'Unity3D',
+ 'upload_date': '20140926',
+ }
+ }, {
+ 'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ youtube_id = self._search_regex(
+ r'data-video-id="([_0-9a-zA-Z-]+)"',
+ webpage, 'youtube ID')
+ return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)
diff --git a/hypervideo_dl/extractor/uol.py b/hypervideo_dl/extractor/uol.py
new file mode 100644
index 0000000..628adf2
--- /dev/null
+++ b/hypervideo_dl/extractor/uol.py
@@ -0,0 +1,144 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ clean_html,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ update_url_query,
+)
+
+
+class UOLIE(InfoExtractor):
+ IE_NAME = 'uol.com.br'
+ _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
+ 'md5': '4f1e26683979715ff64e4e29099cf020',
+ 'info_dict': {
+ 'id': '15951931',
+ 'ext': 'mp4',
+ 'title': 'Miss simpatia é encontrada morta',
+ 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+ 'timestamp': 1470421860,
+ 'upload_date': '20160805',
+ }
+ }, {
+ 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
+ 'info_dict': {
+ 'id': '15954259',
+ 'ext': 'mp4',
+ 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
+ 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+ 'timestamp': 1470674520,
+ 'upload_date': '20160808',
+ }
+ }, {
+ 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/15954259',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
+ 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
+ video_id)['item']
+ media_id = compat_str(video_data['mediaId'])
+ title = video_data['title']
+ ver = video_data.get('revision', 2)
+
+ uol_formats = self._download_json(
+ 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
+ media_id)
+ quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
+ formats = []
+ for format_id, f in uol_formats.items():
+ if not isinstance(f, dict):
+ continue
+ f_url = f.get('url') or f.get('secureUrl')
+ if not f_url:
+ continue
+ query = {
+ 'ver': ver,
+ 'r': 'http://mais.uol.com.br',
+ }
+ for k in ('token', 'sign'):
+ v = f.get(k)
+ if v:
+ query[k] = v
+ f_url = update_url_query(f_url, query)
+ format_id = format_id
+ if format_id == 'HLS':
+ m3u8_formats = self._extract_m3u8_formats(
+ f_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ encoded_query = compat_urllib_parse_urlencode(query)
+ for m3u8_f in m3u8_formats:
+ m3u8_f['extra_param_to_segment_url'] = encoded_query
+ m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
+ formats.extend(m3u8_formats)
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': f_url,
+ 'quality': quality(format_id),
+ 'preference': -1,
+ })
+ self._sort_formats(formats)
+
+ tags = []
+ for tag in video_data.get('tags', []):
+ tag_description = tag.get('description')
+ if not tag_description:
+ continue
+ tags.append(tag_description)
+
+ thumbnails = []
+ for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
+ q_url = video_data.get('thumb' + q)
+ if not q_url:
+ continue
+ thumbnails.append({
+ 'id': q,
+ 'url': q_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'tags': tags,
+ 'formats': formats,
+ 'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
+ 'view_count': int_or_none(video_data.get('viewsQtty')),
+ }
diff --git a/hypervideo_dl/extractor/uplynk.py b/hypervideo_dl/extractor/uplynk.py
new file mode 100644
index 0000000..f06bf5b
--- /dev/null
+++ b/hypervideo_dl/extractor/uplynk.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ ExtractorError,
+)
+
+
+class UplynkIE(InfoExtractor):
+ IE_NAME = 'uplynk'
+ _VALID_URL = r'https?://.*?\.uplynk\.com/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P<session_id>[^&]+))?'
+ _TEST = {
+ 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
+ 'info_dict': {
+ 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
+ 'ext': 'mp4',
+ 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
+ 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _extract_uplynk_info(self, uplynk_content_url):
+ path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
+ display_id = video_id or external_id
+ formats = self._extract_m3u8_formats(
+ 'http://content.uplynk.com/%s.m3u8' % path,
+ display_id, 'mp4', 'm3u8_native')
+ if session_id:
+ for f in formats:
+ f['extra_param_to_segment_url'] = 'pbs=' + session_id
+ self._sort_formats(formats)
+ asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
+ if asset.get('error') == 1:
+ raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True)
+
+ return {
+ 'id': asset['asset'],
+ 'title': asset['desc'],
+ 'thumbnail': asset.get('default_poster_url'),
+ 'duration': float_or_none(asset.get('duration')),
+ 'uploader_id': asset.get('owner'),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_uplynk_info(url)
+
+
+class UplynkPreplayIE(UplynkIE):
+ IE_NAME = 'uplynk:preplay'
+ _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
+ _TEST = None
+
+ def _real_extract(self, url):
+ path, external_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id = video_id or external_id
+ preplay = self._download_json(url, display_id)
+ content_url = 'http://content.uplynk.com/%s.m3u8' % path
+ session_id = preplay.get('sid')
+ if session_id:
+ content_url += '?pbs=' + session_id
+ return self._extract_uplynk_info(content_url)
diff --git a/hypervideo_dl/extractor/urort.py b/hypervideo_dl/extractor/urort.py
new file mode 100644
index 0000000..8f6edab
--- /dev/null
+++ b/hypervideo_dl/extractor/urort.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+)
+from ..utils import (
+ unified_strdate,
+)
+
+
+class UrortIE(InfoExtractor):
+ IE_DESC = 'NRK P3 Urørt'
+ _VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
+
+ _TEST = {
+ 'url': 'https://urort.p3.no/#!/Band/Gerilja',
+ 'md5': '5ed31a924be8a05e47812678a86e127b',
+ 'info_dict': {
+ 'id': '33124-24',
+ 'ext': 'mp3',
+ 'title': 'The Bomb',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'uploader': 'Gerilja',
+ 'uploader_id': 'Gerilja',
+ 'upload_date': '20100323',
+ },
+ 'params': {
+ 'matchtitle': '^The Bomb$', # To test, we want just one video
+ }
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+ json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr
+ songs = self._download_json(json_url, playlist_id)
+ entries = []
+ for s in songs:
+ formats = [{
+ 'tbr': f.get('Quality'),
+ 'ext': f['FileType'],
+ 'format_id': '%s-%s' % (f['FileType'], f.get('Quality', '')),
+ 'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'],
+ 'preference': 3 if f['FileType'] == 'mp3' else 2,
+ } for f in s['Files']]
+ self._sort_formats(formats)
+ e = {
+ 'id': '%d-%s' % (s['BandId'], s['$id']),
+ 'title': s['Title'],
+ 'uploader_id': playlist_id,
+ 'uploader': s.get('BandName', playlist_id),
+ 'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
+ 'upload_date': unified_strdate(s.get('Released')),
+ 'formats': formats,
+ }
+ entries.append(e)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_id,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/urplay.py b/hypervideo_dl/extractor/urplay.py
new file mode 100644
index 0000000..d6c7914
--- /dev/null
+++ b/hypervideo_dl/extractor/urplay.py
@@ -0,0 +1,107 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class URPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
+ 'md5': 'ff5b0c89928f8083c74bbd5099c9292d',
+ 'info_dict': {
+ 'id': '203704',
+ 'ext': 'mp4',
+ 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
+ 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
+ 'timestamp': 1513292400,
+ 'upload_date': '20171214',
+ 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
+ 'duration': 2269,
+ 'categories': ['Kultur & historia'],
+ 'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
+ 'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
+ },
+ }, {
+ 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
+ 'info_dict': {
+ 'id': '190031',
+ 'ext': 'mp4',
+ 'title': 'Tripp, Trapp, Träd : Sovkudde',
+ 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
+ 'timestamp': 1440086400,
+ 'upload_date': '20150820',
+ 'series': 'Tripp, Trapp, Träd',
+ 'duration': 865,
+ 'tags': ['Sova'],
+ 'episode': 'Sovkudde',
+ },
+ }, {
+ 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = url.replace('skola.se/Produkter', 'play.se/program')
+ webpage = self._download_webpage(url, video_id)
+ vid = int(video_id)
+ accessible_episodes = self._parse_json(self._html_search_regex(
+ r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
+ webpage, 'urplayer data'), video_id)['accessibleEpisodes']
+ urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
+ episode = urplayer_data['title']
+ raw_streaming_info = urplayer_data['streamingInfo']['raw']
+ host = self._download_json(
+ 'http://streaming-loadbalancer.ur.se/loadbalancer.json',
+ video_id)['redirect']
+
+ formats = []
+ for k, v in raw_streaming_info.items():
+ if not (k in ('sd', 'hd') and isinstance(v, dict)):
+ continue
+ file_http = v.get('location')
+ if file_http:
+ formats.extend(self._extract_wowza_formats(
+ 'http://%s/%splaylist.m3u8' % (host, file_http),
+ video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
+ self._sort_formats(formats)
+
+ image = urplayer_data.get('image') or {}
+ thumbnails = []
+ for k, v in image.items():
+ t = {
+ 'id': k,
+ 'url': v,
+ }
+ wh = k.split('x')
+ if len(wh) == 2:
+ t.update({
+ 'width': int_or_none(wh[0]),
+ 'height': int_or_none(wh[1]),
+ })
+ thumbnails.append(t)
+
+ series = urplayer_data.get('series') or {}
+ series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
+
+ return {
+ 'id': video_id,
+ 'title': '%s : %s' % (series_title, episode) if series_title else episode,
+ 'description': urplayer_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
+ 'series': series_title,
+ 'formats': formats,
+ 'duration': int_or_none(urplayer_data.get('duration')),
+ 'categories': urplayer_data.get('categories'),
+ 'tags': urplayer_data.get('keywords'),
+ 'season': series.get('label'),
+ 'episode': episode,
+ 'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
+ }
diff --git a/hypervideo_dl/extractor/usanetwork.py b/hypervideo_dl/extractor/usanetwork.py
new file mode 100644
index 0000000..e3784e5
--- /dev/null
+++ b/hypervideo_dl/extractor/usanetwork.py
@@ -0,0 +1,24 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .nbc import NBCIE
+
+
+class USANetworkIE(NBCIE):
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/[^/]+/video/[^/]+/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
+ 'info_dict': {
+ 'id': '4185302',
+ 'ext': 'mp4',
+ 'title': 'Intelligence (Trailer)',
+ 'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
+ 'upload_date': '20200715',
+ 'timestamp': 1594785600,
+ 'uploader': 'NBCU-MPAT',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
diff --git a/hypervideo_dl/extractor/usatoday.py b/hypervideo_dl/extractor/usatoday.py
new file mode 100644
index 0000000..b210344
--- /dev/null
+++ b/hypervideo_dl/extractor/usatoday.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_attribute,
+ parse_duration,
+ try_get,
+ update_url_query,
+)
+from ..compat import compat_str
+
+
+class USATodayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
+ _TESTS = [{
+ # Brightcove Partner ID = 29906170001
+ 'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
+ 'md5': '033587d2529dc3411a1ab3644c3b8827',
+ 'info_dict': {
+ 'id': '4799374959001',
+ 'ext': 'mp4',
+ 'title': 'US, France warn Syrian regime ahead of new peace talks',
+ 'timestamp': 1457891045,
+ 'description': 'md5:7e50464fdf2126b0f533748d3c78d58f',
+ 'uploader_id': '29906170001',
+ 'upload_date': '20160313',
+ }
+ }, {
+ # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001
+ 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/',
+ 'info_dict': {
+ 'id': '5824495846001',
+ 'ext': 'mp4',
+ 'title': 'Yellowstone more likely to crack rather than explode',
+ 'timestamp': 1534790612,
+ 'description': 'md5:3715e7927639a4f16b474e9391687c62',
+ 'uploader_id': '28911775001',
+ 'upload_date': '20180820',
+ }
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(update_url_query(url, {'ajax': 'true'}), display_id)
+ ui_video_data = get_element_by_attribute('class', 'ui-video-data', webpage)
+ if not ui_video_data:
+ raise ExtractorError('no video on the webpage', expected=True)
+ video_data = self._parse_json(ui_video_data, display_id)
+ item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']),
+ 'id': compat_str(video_data['id']),
+ 'title': video_data['title'],
+ 'thumbnail': video_data.get('thumbnail'),
+ 'description': video_data.get('description'),
+ 'duration': parse_duration(video_data.get('length')),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/hypervideo_dl/extractor/ustream.py b/hypervideo_dl/extractor/ustream.py
new file mode 100644
index 0000000..1e29cbe
--- /dev/null
+++ b/hypervideo_dl/extractor/ustream.py
@@ -0,0 +1,284 @@
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ encode_data_uri,
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ mimetype2ext,
+ str_or_none,
+)
+
+
+class UstreamIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+ IE_NAME = 'ustream'
+ _TESTS = [{
+ 'url': 'http://www.ustream.tv/recorded/20274954',
+ 'md5': '088f151799e8f572f84eb62f17d73e5c',
+ 'info_dict': {
+ 'id': '20274954',
+ 'ext': 'flv',
+ 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+ 'description': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+ 'timestamp': 1328577035,
+ 'upload_date': '20120207',
+ 'uploader': 'yaliberty',
+ 'uploader_id': '6780869',
+ },
+ }, {
+ # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
+ # Title and uploader available only from params JSON
+ 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
+ 'md5': '5a2abf40babeac9812ed20ae12d34e10',
+ 'info_dict': {
+ 'id': '59307601',
+ 'ext': 'flv',
+ 'title': '-CG11- Canada Games Figure Skating',
+ 'uploader': 'sportscanadatv',
+ },
+ 'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+ }, {
+ 'url': 'http://www.ustream.tv/embed/10299409',
+ 'info_dict': {
+ 'id': '10299409',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://www.ustream.tv/recorded/91343263',
+ 'info_dict': {
+ 'id': '91343263',
+ 'ext': 'mp4',
+ 'title': 'GitHub Universe - General Session - Day 1',
+ 'upload_date': '20160914',
+ 'description': 'GitHub Universe - General Session - Day 1',
+ 'timestamp': 1473872730,
+ 'uploader': 'wa0dnskeqkr',
+ 'uploader_id': '38977840',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return mobj.group('url')
+
+ def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
+ def num_to_hex(n):
+ return hex(n)[2:]
+
+ rnd = random.randrange
+
+ if not extra_note:
+ extra_note = ''
+
+ conn_info = self._download_json(
+ 'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id),
+ video_id, note='Downloading connection info' + extra_note,
+ query={
+ 'type': 'viewer',
+ 'appId': app_id_ver[0],
+ 'appVersion': app_id_ver[1],
+ 'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))),
+ 'rpin': '_rpin.%d' % rnd(1e15),
+ 'referrer': url,
+ 'media': video_id,
+ 'application': 'recorded',
+ })
+ host = conn_info[0]['args'][0]['host']
+ connection_id = conn_info[0]['args'][0]['connectionId']
+
+ return self._download_json(
+ 'http://%s/1/ustream?connectionId=%s' % (host, connection_id),
+ video_id, note='Downloading stream info' + extra_note)
+
+ def _get_streams(self, url, video_id, app_id_ver):
+ # Sometimes the return dict does not have 'stream'
+ for trial_count in range(3):
+ stream_info = self._get_stream_info(
+ url, video_id, app_id_ver,
+ extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '')
+ if 'stream' in stream_info[0]['args'][0]:
+ return stream_info[0]['args'][0]['stream']
+ return []
+
+ def _parse_segmented_mp4(self, dash_stream_info):
+ def resolve_dash_template(template, idx, chunk_hash):
+ return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash)
+
+ formats = []
+ for stream in dash_stream_info['streams']:
+ # Use only one provider to avoid too many formats
+ provider = dash_stream_info['providers'][0]
+ fragments = [{
+ 'url': resolve_dash_template(
+ provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0'])
+ }]
+ for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']):
+ fragments.append({
+ 'url': resolve_dash_template(
+ provider['url'] + stream['segmentUrl'], idx,
+ dash_stream_info['hashes'][compat_str(idx // 10 * 10)])
+ })
+ content_type = stream['contentType']
+ kind = content_type.split('/')[0]
+ f = {
+ 'format_id': '-'.join(filter(None, [
+ 'dash', kind, str_or_none(stream.get('bitrate'))])),
+ 'protocol': 'http_dash_segments',
+ # TODO: generate a MPD doc for external players?
+ 'url': encode_data_uri(b'<MPD/>', 'text/xml'),
+ 'ext': mimetype2ext(content_type),
+ 'height': stream.get('height'),
+ 'width': stream.get('width'),
+ 'fragments': fragments,
+ }
+ if kind == 'video':
+ f.update({
+ 'vcodec': stream.get('codec'),
+ 'acodec': 'none',
+ 'vbr': stream.get('bitrate'),
+ })
+ else:
+ f.update({
+ 'vcodec': 'none',
+ 'acodec': stream.get('codec'),
+ 'abr': stream.get('bitrate'),
+ })
+ formats.append(f)
+ return formats
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ # some sites use this embed format (see: https://github.com/ytdl-org/youtube-dl/issues/2990)
+ if m.group('type') == 'embed/recorded':
+ video_id = m.group('id')
+ desktop_url = 'http://www.ustream.tv/recorded/' + video_id
+ return self.url_result(desktop_url, 'Ustream')
+ if m.group('type') == 'embed':
+ video_id = m.group('id')
+ webpage = self._download_webpage(url, video_id)
+ content_video_ids = self._parse_json(self._search_regex(
+ r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+ 'content video IDs'), video_id)
+ return self.playlist_result(
+ map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+ video_id)
+
+ params = self._download_json(
+ 'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
+
+ error = params.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
+ video = params['video']
+
+ title = video['title']
+ filesize = float_or_none(video.get('file_size'))
+
+ formats = [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': format_id,
+ 'filesize': filesize,
+ } for format_id, video_url in video['media_urls'].items() if video_url]
+
+ if not formats:
+ hls_streams = self._get_streams(url, video_id, app_id_ver=(11, 2))
+ if hls_streams:
+ # m3u8_native leads to intermittent ContentTooShortError
+ formats.extend(self._extract_m3u8_formats(
+ hls_streams[0]['url'], video_id, ext='mp4', m3u8_id='hls'))
+
+ '''
+ # DASH streams handling is incomplete as 'url' is missing
+ dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1))
+ if dash_streams:
+ formats.extend(self._parse_segmented_mp4(dash_streams))
+ '''
+
+ self._sort_formats(formats)
+
+ description = video.get('description')
+ timestamp = int_or_none(video.get('created_at'))
+ duration = float_or_none(video.get('length'))
+ view_count = int_or_none(video.get('views'))
+
+ uploader = video.get('owner', {}).get('username')
+ uploader_id = video.get('owner', {}).get('id')
+
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ } for thumbnail_id, thumbnail_url in video.get('thumbnail', {}).items()]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
+
+
+class UstreamChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
+ IE_NAME = 'ustream:channel'
+ _TEST = {
+ 'url': 'http://www.ustream.tv/channel/channeljapan',
+ 'info_dict': {
+ 'id': '10874166',
+ },
+ 'playlist_mincount': 17,
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ display_id = m.group('slug')
+ webpage = self._download_webpage(url, display_id)
+ channel_id = self._html_search_meta('ustream:channel_id', webpage)
+
+ BASE = 'http://www.ustream.tv'
+ next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
+ video_ids = []
+ while next_url:
+ reply = self._download_json(
+ compat_urlparse.urljoin(BASE, next_url), display_id,
+ note='Downloading video information (next: %d)' % (len(video_ids) + 1))
+ video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
+ next_url = reply['nextUrl']
+
+ entries = [
+ self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+ for vid in video_ids]
+ return {
+ '_type': 'playlist',
+ 'id': channel_id,
+ 'display_id': display_id,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/ustudio.py b/hypervideo_dl/extractor/ustudio.py
new file mode 100644
index 0000000..56509be
--- /dev/null
+++ b/hypervideo_dl/extractor/ustudio.py
@@ -0,0 +1,125 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+ unescapeHTML,
+)
+
+
+class UstudioIE(InfoExtractor):
+ IE_NAME = 'ustudio'
+ _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
+ 'md5': '58bbfca62125378742df01fc2abbdef6',
+ 'info_dict': {
+ 'id': 'Uxu2my9bgSph',
+ 'display_id': 'san_francisco_golden_gate_bridge',
+ 'ext': 'mp4',
+ 'title': 'San Francisco: Golden Gate Bridge',
+ 'description': 'md5:23925500697f2c6d4830e387ba51a9be',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20111107',
+ 'uploader': 'Tony Farley',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id, display_id = re.match(self._VALID_URL, url).groups()
+
+ config = self._download_xml(
+ 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
+ display_id)
+
+ def extract(kind):
+ return [{
+ 'url': unescapeHTML(item.attrib['url']),
+ 'width': int_or_none(item.get('width')),
+ 'height': int_or_none(item.get('height')),
+ } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
+
+ formats = extract('video')
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
+ webpage, 'upload date', fatal=False))
+ uploader = self._search_regex(
+ r'Uploaded by\s*<a[^>]*>([^<]+)<',
+ webpage, 'uploader', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'thumbnails': extract('image'),
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'formats': formats,
+ }
+
+
+class UstudioEmbedIE(InfoExtractor):
+ IE_NAME = 'ustudio:embed'
+ _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
+ 'md5': '47c0be52a09b23a7f40de9469cec58f4',
+ 'info_dict': {
+ 'id': 'Uw7G1kMCe65T',
+ 'ext': 'mp4',
+ 'title': '5 Things IT Should Know About Video',
+ 'description': 'md5:93d32650884b500115e158c5677d25ad',
+ 'uploader_id': 'DeN7VdYRDKhP',
+ }
+ }
+
+ def _real_extract(self, url):
+ uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+ video_data = self._download_json(
+ 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
+ video_id)['videos'][0]
+ title = video_data['name']
+
+ formats = []
+ for ext, qualities in video_data.get('transcodes', {}).items():
+ for quality in qualities:
+ quality_url = quality.get('url')
+ if not quality_url:
+ continue
+ height = int_or_none(quality.get('height'))
+ formats.append({
+ 'format_id': '%s-%dp' % (ext, height) if height else ext,
+ 'url': quality_url,
+ 'width': int_or_none(quality.get('width')),
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': uploader_id,
+ 'tags': video_data.get('keywords'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/varzesh3.py b/hypervideo_dl/extractor/varzesh3.py
new file mode 100644
index 0000000..f474ed7
--- /dev/null
+++ b/hypervideo_dl/extractor/varzesh3.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ clean_html,
+ remove_start,
+)
+
+
+class Varzesh3IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+ _TESTS = [{
+ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+ 'md5': '2a933874cb7dce4366075281eb49e855',
+ 'info_dict': {
+ 'id': '76337',
+ 'ext': 'mp4',
+ 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
+ 'description': 'فصل ۲۰۱۵-۲۰۱۴',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'HTTP 404 Error',
+ }, {
+ 'url': 'http://video.varzesh3.com/video/112785/%D8%AF%D9%84%D9%87-%D8%B9%D9%84%DB%8C%D8%9B-%D8%B3%D8%AA%D8%A7%D8%B1%D9%87-%D9%86%D9%88%D8%B8%D9%87%D9%88%D8%B1-%D9%84%DB%8C%DA%AF-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AC%D8%B2%DB%8C%D8%B1%D9%87',
+ 'md5': '841b7cd3afbc76e61708d94e53a4a4e7',
+ 'info_dict': {
+ 'id': '112785',
+ 'ext': 'mp4',
+ 'title': 'دله علی؛ ستاره نوظهور لیگ برتر جزیره',
+ 'description': 'فوتبال 120',
+ },
+ 'expected_warnings': ['description'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ title = remove_start(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
+
+ description = self._html_search_regex(
+ r'(?s)<div class="matn">(.+?)</div>',
+ webpage, 'description', default=None)
+ if description is None:
+ description = clean_html(self._html_search_meta('description', webpage))
+
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ if thumbnail is None:
+ fb_sharer_url = self._search_regex(
+ r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
+ webpage, 'facebook sharer URL', fatal=False)
+ sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query)
+ thumbnail = sharer_params.get('p[images][0]', [None])[0]
+
+ video_id = self._search_regex(
+ r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+ webpage, display_id, default=None)
+ if video_id is None:
+ video_id = self._search_regex(
+ r'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
+ default=display_id)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/vbox7.py b/hypervideo_dl/extractor/vbox7.py
new file mode 100644
index 0000000..8152ace
--- /dev/null
+++ b/hypervideo_dl/extractor/vbox7.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class Vbox7IE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?vbox7\.com/
+ (?:
+ play:|
+ (?:
+ emb/external\.php|
+ player/ext\.swf
+ )\?.*?\bvid=
+ )
+ (?P<id>[\da-fA-F]+)
+ '''
+ _GEO_COUNTRIES = ['BG']
+ _TESTS = [{
+ 'url': 'http://vbox7.com/play:0946fff23c',
+ 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
+ 'info_dict': {
+ 'id': '0946fff23c',
+ 'ext': 'mp4',
+ 'title': 'Борисов: Притеснен съм за бъдещето на България',
+ 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1470982814,
+ 'upload_date': '20160812',
+ 'uploader': 'zdraveibulgaria',
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118',
+ },
+ }, {
+ 'url': 'http://vbox7.com/play:249bb972c2',
+ 'md5': '99f65c0c9ef9b682b97313e052734c3f',
+ 'info_dict': {
+ 'id': '249bb972c2',
+ 'ext': 'mp4',
+ 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
+ },
+ 'skip': 'georestricted',
+ }, {
+ 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ response = self._download_json(
+ 'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
+ video_id)
+
+ if 'error' in response:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
+
+ video = response['options']
+
+ title = video['title']
+ video_url = video['src']
+
+ if '/na.mp4' in video_url:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
+ uploader = video.get('uploader')
+
+ webpage = self._download_webpage(
+ 'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
+
+ info = {}
+
+ if webpage:
+ info = self._search_json_ld(
+ webpage.replace('"/*@context"', '"@context"'), video_id,
+ fatal=False)
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'uploader': uploader,
+ 'thumbnail': self._proto_relative_url(
+ info.get('thumbnail') or self._og_search_thumbnail(webpage),
+ 'http:'),
+ })
+ return info
diff --git a/hypervideo_dl/extractor/veehd.py b/hypervideo_dl/extractor/veehd.py
new file mode 100644
index 0000000..a6dc3c8
--- /dev/null
+++ b/hypervideo_dl/extractor/veehd.py
@@ -0,0 +1,118 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ get_element_by_id,
+)
+
+
+class VeeHDIE(InfoExtractor):
+ _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
+
+ # Seems VeeHD videos have multiple copies on several servers, all of
+ # whom have different MD5 checksums, so omit md5 field in all tests
+ _TESTS = [{
+ 'url': 'http://veehd.com/video/4639434_Solar-Sinter',
+ 'info_dict': {
+ 'id': '4639434',
+ 'ext': 'mp4',
+ 'title': 'Solar Sinter',
+ 'uploader_id': 'VideoEyes',
+ 'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
+ },
+ 'skip': 'Video deleted',
+ }, {
+ 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
+ 'info_dict': {
+ 'id': '4905758',
+ 'ext': 'mp4',
+ 'title': 'Elysian Fields - Channeling',
+ 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
+ 'uploader_id': 'spotted',
+ }
+ }, {
+ 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
+ 'info_dict': {
+ 'id': '2046729',
+ 'ext': 'avi',
+ 'title': '2012 (2009) DivX Trailer',
+ 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
+ 'uploader_id': 'Movie_Trailers',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # VeeHD seems to send garbage on the first request.
+ # See https://github.com/ytdl-org/youtube-dl/issues/2102
+ self._download_webpage(url, video_id, 'Requesting webpage')
+ webpage = self._download_webpage(url, video_id)
+
+ if 'This video has been removed<' in webpage:
+ raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
+ player_path = self._search_regex(
+ r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
+ webpage, 'player path')
+ player_url = compat_urlparse.urljoin(url, player_path)
+
+ self._download_webpage(player_url, video_id, 'Requesting player page')
+ player_page = self._download_webpage(
+ player_url, video_id, 'Downloading player page')
+
+ video_url = None
+
+ config_json = self._search_regex(
+ r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
+
+ if config_json:
+ config = json.loads(config_json)
+ video_url = compat_urllib_parse_unquote(config['clip']['url'])
+
+ if not video_url:
+ video_url = self._html_search_regex(
+ r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
+ player_page, 'video url', default=None)
+
+ if not video_url:
+ iframe_src = self._search_regex(
+ r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
+ iframe_url = 'http://veehd.com/%s' % iframe_src
+
+ self._download_webpage(iframe_url, video_id, 'Requesting iframe page')
+ iframe_page = self._download_webpage(
+ iframe_url, video_id, 'Downloading iframe page')
+
+ video_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", iframe_page, 'video url')
+
+ title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
+ uploader_id = self._html_search_regex(
+ r'<a href="/profile/\d+">(.+?)</a>',
+ webpage, 'uploader')
+ thumbnail = self._search_regex(
+ r'<img id="veehdpreview" src="(.+?)"',
+ webpage, 'thumbnail')
+ description = self._html_search_regex(
+ r'<td class="infodropdown".*?<div>(.*?)<ul',
+ webpage, 'description', flags=re.DOTALL)
+
+ return {
+ '_type': 'video',
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'uploader_id': uploader_id,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/veoh.py b/hypervideo_dl/extractor/veoh.py
new file mode 100644
index 0000000..1c44c14
--- /dev/null
+++ b/hypervideo_dl/extractor/veoh.py
@@ -0,0 +1,103 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ qualities,
+)
+
+
+class VeohIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+ 'md5': '9e7ecc0fd8bbee7a69fe38953aeebd30',
+ 'info_dict': {
+ 'id': 'v56314296nk7Zdmz3',
+ 'ext': 'mp4',
+ 'title': 'Straight Backs Are Stronger',
+ 'uploader': 'LUMOback',
+ 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
+ },
+ }, {
+ 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
+ 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
+ 'info_dict': {
+ 'id': '27701988',
+ 'ext': 'mp4',
+ 'title': 'Chile workers cover up to avoid skin damage',
+ 'description': 'md5:2bd151625a60a32822873efc246ba20d',
+ 'uploader': 'afp-news',
+ 'duration': 123,
+ },
+ 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
+ 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
+ 'note': 'Embedded ooyala video',
+ 'info_dict': {
+ 'id': '69525809',
+ 'ext': 'mp4',
+ 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
+ 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
+ 'uploader': 'newsy-videos',
+ },
+ 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
+ 'only_matching': True,
+ }]
+
+ def _extract_video(self, source):
+ return {
+ 'id': source.get('videoId'),
+ 'title': source.get('title'),
+ 'description': source.get('description'),
+ 'thumbnail': source.get('highResImage') or source.get('medResImage'),
+ 'uploader': source.get('username'),
+ 'duration': int_or_none(source.get('length')),
+ 'view_count': int_or_none(source.get('views')),
+ 'age_limit': 18 if source.get('isMature') == 'true' or source.get('isSexy') == 'true' else 0,
+ 'formats': self._extract_formats(source),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://www.veoh.com/watch/getVideo/' + video_id,
+ video_id)['video']
+ title = video['title']
+
+ thumbnail_url = None
+ q = qualities(['HQ', 'Regular'])
+ formats = []
+ for f_id, f_url in video.get('src', {}).items():
+ if not f_url:
+ continue
+ if f_id == 'poster':
+ thumbnail_url = f_url
+ else:
+ formats.append({
+ 'format_id': f_id,
+ 'quality': q(f_id),
+ 'url': f_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': thumbnail_url,
+ 'uploader': video.get('author', {}).get('nickname'),
+ 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
+ 'view_count': int_or_none(video.get('views')),
+ 'formats': formats,
+ 'average_rating': int_or_none(video.get('rating')),
+ 'comment_count': int_or_none(video.get('numOfComments')),
+ }
diff --git a/hypervideo_dl/extractor/vesti.py b/hypervideo_dl/extractor/vesti.py
new file mode 100644
index 0000000..5ab7168
--- /dev/null
+++ b/hypervideo_dl/extractor/vesti.py
@@ -0,0 +1,121 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .rutv import RUTVIE
+
+
+class VestiIE(InfoExtractor):
+ IE_DESC = 'Вести.Ru'
+ _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
+ 'info_dict': {
+ 'id': '765035',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: биткоины в России не являются законными',
+ 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
+ 'duration': 302,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.vesti.ru/doc.html?id=1349233',
+ 'info_dict': {
+ 'id': '773865',
+ 'ext': 'mp4',
+ 'title': 'Участники митинга штурмуют Донецкую областную администрацию',
+ 'description': 'md5:1a160e98b3195379b4c849f2f4958009',
+ 'duration': 210,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.vesti.ru/only_video.html?vid=576180',
+ 'info_dict': {
+ 'id': '766048',
+ 'ext': 'mp4',
+ 'title': 'США заморозило, Британию затопило',
+ 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
+ 'duration': 87,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://hitech.vesti.ru/news/view/id/4000',
+ 'info_dict': {
+ 'id': '766888',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+ 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+ 'duration': 279,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+ 'info_dict': {
+ 'id': '766403',
+ 'ext': 'mp4',
+ 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
+ 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
+ 'duration': 271,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked outside Russia',
+ },
+ {
+ 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+ 'info_dict': {
+ 'id': '51499',
+ 'ext': 'flv',
+ 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+ 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Translation has finished'
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ page = self._download_webpage(url, video_id, 'Downloading page')
+
+ mobj = re.search(
+ r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
+ page)
+ if mobj:
+ video_id = mobj.group('id')
+ page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
+ 'Downloading video page')
+
+ rutv_url = RUTVIE._extract_url(page)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ raise ExtractorError('No video found', expected=True)
diff --git a/hypervideo_dl/extractor/vevo.py b/hypervideo_dl/extractor/vevo.py
new file mode 100644
index 0000000..4ea9f1b
--- /dev/null
+++ b/hypervideo_dl/extractor/vevo.py
@@ -0,0 +1,374 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+ compat_HTTPError,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class VevoBaseIE(InfoExtractor):
+ def _extract_json(self, webpage, video_id):
+ return self._parse_json(
+ self._search_regex(
+ r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
+ webpage, 'initial store'),
+ video_id)
+
+
+class VevoIE(VevoBaseIE):
+ '''
+ Accepts urls from vevo.com or in the format 'vevo:{id}'
+ (currently used by MTVIE and MySpaceIE)
+ '''
+ _VALID_URL = r'''(?x)
+ (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
+ https?://cache\.vevo\.com/m/html/embed\.html\?video=|
+ https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
+ https?://embed\.vevo\.com/.*?[?&]isrc=|
+ vevo:)
+ (?P<id>[^&?#]+)'''
+
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
+ 'info_dict': {
+ 'id': 'GB1101300280',
+ 'ext': 'mp4',
+ 'title': 'Hurts - Somebody to Die For',
+ 'timestamp': 1372057200,
+ 'upload_date': '20130624',
+ 'uploader': 'Hurts',
+ 'track': 'Somebody to Die For',
+ 'artist': 'Hurts',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'v3 SMIL format',
+ 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
+ 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
+ 'info_dict': {
+ 'id': 'USUV71302923',
+ 'ext': 'mp4',
+ 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+ 'timestamp': 1392796919,
+ 'upload_date': '20140219',
+ 'uploader': 'Cassadee Pope',
+ 'track': 'I Wish I Could Break Your Heart',
+ 'artist': 'Cassadee Pope',
+ 'genre': 'Country',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Age-limited video',
+ 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
+ 'info_dict': {
+ 'id': 'USRV81300282',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'age_limit': 18,
+ 'timestamp': 1372888800,
+ 'upload_date': '20130703',
+ 'uploader': 'Justin Timberlake',
+ 'track': 'Tunnel Vision (Explicit)',
+ 'artist': 'Justin Timberlake',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'No video_info',
+ 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
+ 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
+ 'info_dict': {
+ 'id': 'USUV71503000',
+ 'ext': 'mp4',
+ 'title': 'K Camp ft. T.I. - Till I Die',
+ 'age_limit': 18,
+ 'timestamp': 1449468000,
+ 'upload_date': '20151207',
+ 'uploader': 'K Camp',
+ 'track': 'Till I Die',
+ 'artist': 'K Camp',
+ 'genre': 'Hip-Hop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Featured test',
+ 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+ 'md5': 'd28675e5e8805035d949dc5cf161071d',
+ 'info_dict': {
+ 'id': 'USUV71402190',
+ 'ext': 'mp4',
+ 'title': 'Lemaitre ft. LoLo - Wait',
+ 'age_limit': 0,
+ 'timestamp': 1413432000,
+ 'upload_date': '20141016',
+ 'uploader': 'Lemaitre',
+ 'track': 'Wait',
+ 'artist': 'Lemaitre',
+ 'genre': 'Electronic',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Only available via webpage',
+ 'url': 'http://www.vevo.com/watch/GBUV71600656',
+ 'md5': '67e79210613865b66a47c33baa5e37fe',
+ 'info_dict': {
+ 'id': 'GBUV71600656',
+ 'ext': 'mp4',
+ 'title': 'ABC - Viva Love',
+ 'age_limit': 0,
+ 'timestamp': 1461830400,
+ 'upload_date': '20160428',
+ 'uploader': 'ABC',
+ 'track': 'Viva Love',
+ 'artist': 'ABC',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Failed to download video versions info'],
+ }, {
+ # no genres available
+ 'url': 'http://www.vevo.com/watch/INS171400764',
+ 'only_matching': True,
+ }, {
+ # Another case available only via the webpage; using streams/streamsV3 formats
+ # Geo-restricted to Netherlands/Germany
+ 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
+ 'only_matching': True,
+ }]
+ _VERSIONS = {
+ 0: 'youtube', # only in AuthenticateVideo videoVersions
+ 1: 'level3',
+ 2: 'akamai',
+ 3: 'level3',
+ 4: 'amazon',
+ }
+
+ def _initialize_api(self, video_id):
+ webpage = self._download_webpage(
+ 'https://accounts.vevo.com/token', None,
+ note='Retrieving oauth token',
+ errnote='Unable to retrieve oauth token',
+ data=json.dumps({
+ 'client_id': 'SPupX1tvqFEopQ1YS6SS',
+ 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ })
+
+ if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
+ self.raise_geo_restricted(
+ '%s said: This page is currently unavailable in your region' % self.IE_NAME)
+
+ auth_info = self._parse_json(webpage, video_id)
+ self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
+
+ def _call_api(self, path, *args, **kwargs):
+ try:
+ data = self._download_json(self._api_url_template % path, *args, **kwargs)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ errors = self._parse_json(e.cause.read().decode(), None)['errors']
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ raise
+ return data
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._initialize_api(video_id)
+
+ video_info = self._call_api(
+ 'video/%s' % video_id, video_id, 'Downloading api video info',
+ 'Failed to download video info')
+
+ video_versions = self._call_api(
+ 'video/%s/streams' % video_id, video_id,
+ 'Downloading video versions info',
+ 'Failed to download video versions info',
+ fatal=False)
+
+ # Some videos are only available via webpage (e.g.
+ # https://github.com/ytdl-org/youtube-dl/issues/9366)
+ if not video_versions:
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._extract_json(webpage, video_id)
+ if 'streams' in json_data.get('default', {}):
+ video_versions = json_data['default']['streams'][video_id][0]
+ else:
+ video_versions = [
+ value
+ for key, value in json_data['apollo']['data'].items()
+ if key.startswith('%s.streams' % video_id)]
+
+ uploader = None
+ artist = None
+ featured_artist = None
+ artists = video_info.get('artists')
+ for curr_artist in artists:
+ if curr_artist.get('role') == 'Featured':
+ featured_artist = curr_artist['name']
+ else:
+ artist = uploader = curr_artist['name']
+
+ formats = []
+ for video_version in video_versions:
+ version = self._VERSIONS.get(video_version.get('version'), 'generic')
+ version_url = video_version.get('url')
+ if not version_url:
+ continue
+
+ if '.ism' in version_url:
+ continue
+ elif '.mpd' in version_url:
+ formats.extend(self._extract_mpd_formats(
+ version_url, video_id, mpd_id='dash-%s' % version,
+ note='Downloading %s MPD information' % version,
+ errnote='Failed to download %s MPD information' % version,
+ fatal=False))
+ elif '.m3u8' in version_url:
+ formats.extend(self._extract_m3u8_formats(
+ version_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls-%s' % version,
+ note='Downloading %s m3u8 information' % version,
+ errnote='Failed to download %s m3u8 information' % version,
+ fatal=False))
+ else:
+ m = re.search(r'''(?xi)
+ _(?P<width>[0-9]+)x(?P<height>[0-9]+)
+ _(?P<vcodec>[a-z0-9]+)
+ _(?P<vbr>[0-9]+)
+ _(?P<acodec>[a-z0-9]+)
+ _(?P<abr>[0-9]+)
+ \.(?P<ext>[a-z0-9]+)''', version_url)
+ if not m:
+ continue
+
+ formats.append({
+ 'url': version_url,
+ 'format_id': 'http-%s-%s' % (version, video_version['quality']),
+ 'vcodec': m.group('vcodec'),
+ 'acodec': m.group('acodec'),
+ 'vbr': int(m.group('vbr')),
+ 'abr': int(m.group('abr')),
+ 'ext': m.group('ext'),
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ self._sort_formats(formats)
+
+ track = video_info['title']
+ if featured_artist:
+ artist = '%s ft. %s' % (artist, featured_artist)
+ title = '%s - %s' % (artist, track) if artist else track
+
+ genres = video_info.get('genres')
+ genre = (
+ genres[0] if genres and isinstance(genres, list)
+ and isinstance(genres[0], compat_str) else None)
+
+ is_explicit = video_info.get('isExplicit')
+ if is_explicit is True:
+ age_limit = 18
+ elif is_explicit is False:
+ age_limit = 0
+ else:
+ age_limit = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
+ 'timestamp': parse_iso8601(video_info.get('releaseDate')),
+ 'uploader': uploader,
+ 'duration': int_or_none(video_info.get('duration')),
+ 'view_count': int_or_none(video_info.get('views', {}).get('total')),
+ 'age_limit': age_limit,
+ 'track': track,
+ 'artist': uploader,
+ 'genre': genre,
+ }
+
+
+class VevoPlaylistIE(VevoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+ 'info_dict': {
+ 'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+ 'title': 'Best-Of: Birdman',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'http://www.vevo.com/watch/genre/rock',
+ 'info_dict': {
+ 'id': 'rock',
+ 'title': 'Rock',
+ },
+ 'playlist_count': 20,
+ }, {
+ 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
+ 'md5': '32dcdfddddf9ec6917fc88ca26d36282',
+ 'info_dict': {
+ 'id': 'USCMV1100073',
+ 'ext': 'mp4',
+ 'title': 'Birdman - Y.U. MAD',
+ 'timestamp': 1323417600,
+ 'upload_date': '20111209',
+ 'uploader': 'Birdman',
+ 'track': 'Y.U. MAD',
+ 'artist': 'Birdman',
+ 'genre': 'Rap/Hip-Hop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file'],
+ }, {
+ 'url': 'http://www.vevo.com/watch/genre/rock?index=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ playlist_kind = mobj.group('kind')
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ index = qs.get('index', [None])[0]
+
+ if index:
+ video_id = self._search_regex(
+ r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
+ webpage, 'video id', default=None, group='id')
+ if video_id:
+ return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
+
+ playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
+
+ playlist = (list(playlists.values())[0]
+ if playlist_kind == 'playlist' else playlists[playlist_id])
+
+ entries = [
+ self.url_result('vevo:%s' % src, VevoIE.ie_key())
+ for src in playlist['isrcs']]
+
+ return self.playlist_result(
+ entries, playlist.get('playlistId') or playlist_id,
+ playlist.get('name'), playlist.get('description'))
diff --git a/hypervideo_dl/extractor/vgtv.py b/hypervideo_dl/extractor/vgtv.py
new file mode 100644
index 0000000..22e99e8
--- /dev/null
+++ b/hypervideo_dl/extractor/vgtv.py
@@ -0,0 +1,313 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .xstream import XstreamIE
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ try_get,
+)
+
+
+class VGTVIE(XstreamIE):
+ IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
+ _GEO_BYPASS = False
+
+ _HOST_TO_APPNAME = {
+ 'vgtv.no': 'vgtv',
+ 'bt.no/tv': 'bttv',
+ 'aftenbladet.no/tv': 'satv',
+ 'fvn.no/fvntv': 'fvntv',
+ 'aftenposten.no/webtv': 'aptv',
+ 'ap.vgtv.no/webtv': 'aptv',
+ 'tv.aftonbladet.se': 'abtv',
+ # obsolete URL schemas, kept in order to save one HTTP redirect
+ 'tv.aftonbladet.se/abtv': 'abtv',
+ 'www.aftonbladet.se/tv': 'abtv',
+ }
+
+ _APP_NAME_TO_VENDOR = {
+ 'vgtv': 'vgtv',
+ 'bttv': 'bt',
+ 'satv': 'sa',
+ 'fvntv': 'fvn',
+ 'aptv': 'ap',
+ 'abtv': 'ab',
+ }
+
+ _VALID_URL = r'''(?x)
+ (?:https?://(?:www\.)?
+ (?P<host>
+ %s
+ )
+ /?
+ (?:
+ (?:\#!/)?(?:video|live)/|
+ embed?.*id=|
+ a(?:rticles)?/
+ )|
+ (?P<appname>
+ %s
+ ):)
+ (?P<id>\d+)
+ ''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
+
+ _TESTS = [
+ {
+ # streamType: vod
+ 'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
+ 'md5': 'b8be7a234cebb840c0d512c78013e02f',
+ 'info_dict': {
+ 'id': '84196',
+ 'ext': 'mp4',
+ 'title': 'Hevnen er søt: Episode 10 - Abu',
+ 'description': 'md5:e25e4badb5f544b04341e14abdc72234',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 648.000,
+ 'timestamp': 1404626400,
+ 'upload_date': '20140706',
+ 'view_count': int,
+ },
+ },
+ {
+ # streamType: wasLive
+ 'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
+ 'info_dict': {
+ 'id': '100764',
+ 'ext': 'flv',
+ 'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
+ 'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 9103.0,
+ 'timestamp': 1410113864,
+ 'upload_date': '20140907',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Video is no longer available',
+ },
+ {
+ # streamType: wasLive
+ 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
+ 'info_dict': {
+ 'id': '113063',
+ 'ext': 'mp4',
+ 'title': 'V75 fra Solvalla 30.05.15',
+ 'description': 'md5:b3743425765355855f88e096acc93231',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 25966,
+ 'timestamp': 1432975582,
+ 'upload_date': '20150530',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
+ 'md5': 'fd828cd29774a729bf4d4425fe192972',
+ 'info_dict': {
+ 'id': '21039',
+ 'ext': 'mp4',
+ 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
+ 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+ 'duration': 66,
+ 'timestamp': 1417002452,
+ 'upload_date': '20141126',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+ 'only_matching': True,
+ },
+ {
+ # geoblocked
+ 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://tv.aftonbladet.se/video/36015/vulkanutbrott-i-rymden-nu-slapper-nasa-bilderna',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.aftonbladet.se/tv/a/36015',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'abtv:140026',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.vgtv.no/video/84196/hevnen-er-soet-episode-10-abu',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+ appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
+ vendor = self._APP_NAME_TO_VENDOR[appname]
+
+ data = self._download_json(
+ 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
+ % (vendor, video_id, appname),
+ video_id, 'Downloading media JSON')
+
+ if data.get('status') == 'inactive':
+ raise ExtractorError(
+ 'Video %s is no longer available' % video_id, expected=True)
+
+ info = {
+ 'formats': [],
+ }
+ if len(video_id) == 5:
+ if appname == 'bttv':
+ info = self._extract_video_info('btno', video_id)
+
+ streams = data['streamUrls']
+ stream_type = data.get('streamType')
+ is_live = stream_type == 'live'
+ formats = []
+
+ hls_url = streams.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4',
+ entry_protocol='m3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ hds_url = streams.get('hds')
+ if hds_url:
+ hdcore_sign = 'hdcore=3.7.0'
+ f4m_formats = self._extract_f4m_formats(
+ hds_url + '?%s' % hdcore_sign, video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ for entry in f4m_formats:
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.append(entry)
+
+ mp4_urls = streams.get('pseudostreaming') or []
+ mp4_url = streams.get('mp4')
+ if mp4_url:
+ mp4_urls.append(mp4_url)
+ for mp4_url in mp4_urls:
+ format_info = {
+ 'url': mp4_url,
+ }
+ mobj = re.search(r'(\d+)_(\d+)_(\d+)', mp4_url)
+ if mobj:
+ tbr = int(mobj.group(3))
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': tbr,
+ 'format_id': 'mp4-%s' % tbr,
+ })
+ formats.append(format_info)
+
+ info['formats'].extend(formats)
+
+ if not info['formats']:
+ properties = try_get(
+ data, lambda x: x['streamConfiguration']['properties'], list)
+ if properties and 'geoblocked' in properties:
+ raise self.raise_geo_restricted(
+ countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
+
+ self._sort_formats(info['formats'])
+
+ info.update({
+ 'id': video_id,
+ 'title': self._live_title(data['title']) if is_live else data['title'],
+ 'description': data['description'],
+ 'thumbnail': data['images']['main'] + '?t[]=900x506q80',
+ 'timestamp': data['published'],
+ 'duration': float_or_none(data['duration'], 1000),
+ 'view_count': data['displays'],
+ 'is_live': is_live,
+ })
+ return info
+
+
+class BTArticleIE(InfoExtractor):
+ IE_NAME = 'bt:article'
+ IE_DESC = 'Bergens Tidende Articles'
+ _VALID_URL = r'https?://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
+ _TEST = {
+ 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
+ 'md5': '2acbe8ad129b3469d5ae51b1158878df',
+ 'info_dict': {
+ 'id': '23199',
+ 'ext': 'mp4',
+ 'title': 'Alrekstad internat',
+ 'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 191,
+ 'timestamp': 1289991323,
+ 'upload_date': '20101117',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, self._match_id(url))
+ video_id = self._search_regex(
+ r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
+ return self.url_result('bttv:%s' % video_id, 'VGTV')
+
+
+class BTVestlendingenIE(InfoExtractor):
+ IE_NAME = 'bt:vestlendingen'
+ IE_DESC = 'Bergens Tidende - Vestlendingen'
+ _VALID_URL = r'https?://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ 'skip': '404 Error',
+ }, {
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
+ 'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
+ 'info_dict': {
+ 'id': '86255',
+ 'ext': 'mov',
+ 'title': 'Du må tåle å fryse og være sulten',
+ 'description': 'md5:b8046f4d022d5830ddab04865791d063',
+ 'upload_date': '20150321',
+ 'timestamp': 1426942023,
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')
diff --git a/hypervideo_dl/extractor/vh1.py b/hypervideo_dl/extractor/vh1.py
new file mode 100644
index 0000000..dff94a2
--- /dev/null
+++ b/hypervideo_dl/extractor/vh1.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class VH1IE(MTVServicesInfoExtractor):
+ IE_NAME = 'vh1.com'
+ _FEED_URL = 'http://www.vh1.com/feeds/mrss/'
+ _TESTS = [{
+ 'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120',
+ 'info_dict': {
+ 'title': 'Kent Jones vs. Nick Young',
+ 'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, O’Shea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # Clip
+ 'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview',
+ 'info_dict': {
+ 'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92',
+ 'ext': 'mp4',
+ 'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview',
+ 'description': 'md5:eff5551a274c473a29463de40f7b09da',
+ 'upload_date': '20171009',
+ 'timestamp': 1507574700,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ mgid = self._extract_triforce_mgid(webpage)
+ videos_info = self._get_videos_info(mgid)
+ return videos_info
diff --git a/hypervideo_dl/extractor/vice.py b/hypervideo_dl/extractor/vice.py
new file mode 100644
index 0000000..e374995
--- /dev/null
+++ b/hypervideo_dl/extractor/vice.py
@@ -0,0 +1,337 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import hashlib
+import json
+import random
+import re
+import time
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ OnDemandPagedList,
+ parse_age_limit,
+ str_or_none,
+ try_get,
+)
+
+
+class ViceBaseIE(InfoExtractor):
+ def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
+ return self._download_json(
+ 'https://video.vice.com/api/v1/graphql', resource_id, query={
+ 'query': '''{
+ %s(locale: "%s", %s: "%s"%s) {
+ %s
+ }
+}''' % (resource, locale, resource_key, resource_id, args, fields),
+ })['data'][resource]
+
+
+class ViceIE(ViceBaseIE, AdobePassIE):
+ IE_NAME = 'vice'
+ _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
+ 'info_dict': {
+ 'id': '58c69e38a55424f1227dc3f7',
+ 'ext': 'mp4',
+ 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
+ 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1489664942,
+ 'upload_date': '20170316',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted to US
+ 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+ 'info_dict': {
+ 'id': '5816510690b70e6c5fd39a56',
+ 'ext': 'mp4',
+ 'uploader': 'vice',
+ 'title': 'The Signal From Tölva',
+ 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1477941983,
+ 'upload_date': '20161031',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
+ 'info_dict': {
+ 'id': '581b12b60a0e1f4c0fb6ea2f',
+ 'ext': 'mp4',
+ 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
+ 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1485368119,
+ 'upload_date': '20170125',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
+ webpage)
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = ViceIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ def _real_extract(self, url):
+ locale, video_id = re.match(self._VALID_URL, url).groups()
+
+ video = self._call_api('videos', 'id', video_id, locale, '''body
+ locked
+ rating
+ thumbnail_url
+ title''')[0]
+ title = video['title'].strip()
+ rating = video.get('rating')
+
+ query = {}
+ if video.get('locked'):
+ resource = self._get_mvpd_resource(
+ 'VICELAND', title, video_id, rating)
+ query['tvetoken'] = self._extract_mvpd_auth(
+ url, video_id, 'VICELAND', resource)
+
+ # signature generation algorithm is reverse engineered from signatureGenerator in
+ # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
+ # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
+ # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
+ exp = int(time.time()) + 1440
+
+ query.update({
+ 'exp': exp,
+ 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+ 'skipadstitching': 1,
+ 'platform': 'desktop',
+ 'rn': random.randint(10000, 100000),
+ })
+
+ try:
+ preplay = self._download_json(
+ 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
+ video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
+ error = json.loads(e.cause.read().decode())
+ error_message = error.get('error_description') or error['details']
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, error_message), expected=True)
+ raise
+
+ video_data = preplay['video']
+ formats = self._extract_m3u8_formats(
+ preplay['playURL'], video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+ episode = video_data.get('episode') or {}
+ channel = video_data.get('channel') or {}
+ season = video_data.get('season') or {}
+
+ subtitles = {}
+ for subtitle in preplay.get('subtitleURLs', []):
+ cc_url = subtitle.get('url')
+ if not cc_url:
+ continue
+ language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
+ subtitles.setdefault(language_code, []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'formats': formats,
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(video.get('body')),
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': int_or_none(video_data.get('video_duration')),
+ 'timestamp': int_or_none(video_data.get('created_at'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
+ 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
+ 'episode_number': int_or_none(episode.get('episode_number')),
+ 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
+ 'season_number': int_or_none(season.get('season_number')),
+ 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
+ 'uploader': channel.get('name'),
+ 'uploader_id': str_or_none(channel.get('id')),
+ 'subtitles': subtitles,
+ }
+
+
+class ViceShowIE(ViceBaseIE):
+ IE_NAME = 'vice:show'
+ _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
+ _PAGE_SIZE = 25
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
+ 'info_dict': {
+ 'id': '57a2040c8cb727dec794c901',
+ 'title': 'F*ck, That’s Delicious',
+ 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
+ },
+ 'playlist_mincount': 64,
+ }, {
+ 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
+ 'only_matching': True,
+ }]
+
+ def _fetch_page(self, locale, show_id, page):
+ videos = self._call_api('videos', 'show_id', show_id, locale, '''body
+ id
+ url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
+ for video in videos:
+ yield self.url_result(
+ video['url'], ViceIE.ie_key(), video.get('id'))
+
+ def _real_extract(self, url):
+ locale, display_id = re.match(self._VALID_URL, url).groups()
+ show = self._call_api('shows', 'slug', display_id, locale, '''dek
+ id
+ title''')[0]
+ show_id = show['id']
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, locale, show_id),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('dek'))
+
+
+class ViceArticleIE(ViceBaseIE):
+ IE_NAME = 'vice:article'
+ _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
+ 'info_dict': {
+ 'id': '58dc0a3dee202d2a0ccfcbd8',
+ 'ext': 'mp4',
+ 'title': 'Mormon War on Porn',
+ 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1491883129,
+ 'upload_date': '20170411',
+ 'age_limit': 17,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ },
+ 'add_ie': [ViceIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
+ 'md5': '13010ee0bc694ea87ec40724397c2349',
+ 'info_dict': {
+ 'id': '3jstaBeXgAs',
+ 'ext': 'mp4',
+ 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
+ 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
+ 'uploader': 'Motherboard',
+ 'uploader_id': 'MotherboardTV',
+ 'upload_date': '20140529',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
+ 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+ 'info_dict': {
+ 'id': '57f41d3556a0a80f54726060',
+ 'ext': 'mp4',
+ 'title': "Making The World's First Male Sex Doll",
+ 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1476919911,
+ 'upload_date': '20161019',
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ 'add_ie': [ViceIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ locale, display_id = re.match(self._VALID_URL, url).groups()
+
+ article = self._call_api('articles', 'slug', display_id, locale, '''body
+ embed_code''')[0]
+ body = article['body']
+
+ def _url_res(video_url, ie_key):
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'display_id': display_id,
+ 'ie_key': ie_key,
+ }
+
+ vice_url = ViceIE._extract_url(body)
+ if vice_url:
+ return _url_res(vice_url, ViceIE.ie_key())
+
+ embed_code = self._search_regex(
+ r'embedCode=([^&\'"]+)', body,
+ 'ooyala embed code', default=None)
+ if embed_code:
+ return _url_res('ooyala:%s' % embed_code, 'Ooyala')
+
+ youtube_url = YoutubeIE._extract_url(body)
+ if youtube_url:
+ return _url_res(youtube_url, YoutubeIE.ie_key())
+
+ video_url = self._html_search_regex(
+ r'data-video-url="([^"]+)"',
+ article['embed_code'], 'video URL')
+
+ return _url_res(video_url, ViceIE.ie_key())
diff --git a/hypervideo_dl/extractor/vidbit.py b/hypervideo_dl/extractor/vidbit.py
new file mode 100644
index 0000000..91f45b7
--- /dev/null
+++ b/hypervideo_dl/extractor/vidbit.py
@@ -0,0 +1,84 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ remove_end,
+ unified_strdate,
+)
+
+
+class VidbitIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
+ 'md5': '1a34b7f14defe3b8fafca9796892924d',
+ 'info_dict': {
+ 'id': 'jkL2yDOEq2',
+ 'ext': 'mp4',
+ 'title': 'Intro to VidBit',
+ 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20160618',
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
+
+ video_url, title = [None] * 2
+
+ config = self._parse_json(self._search_regex(
+ r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
+ video_id, transform_source=js_to_json)
+ if config:
+ if config.get('file'):
+ video_url = compat_urlparse.urljoin(url, config['file'])
+ title = config.get('title')
+
+ if not video_url:
+ video_url = compat_urlparse.urljoin(url, self._search_regex(
+ r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video URL', group='url'))
+
+ if not title:
+ title = remove_end(
+ self._html_search_regex(
+ (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
+ webpage, 'title', default=None) or self._og_search_title(webpage),
+ ' - VidBit')
+
+ description = self._html_search_meta(
+ ('description', 'og:description', 'twitter:description'),
+ webpage, 'description')
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'datePublished', webpage, 'upload date'))
+
+ view_count = int_or_none(self._search_regex(
+ r'<strong>(\d+)</strong> views',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._search_regex(
+ r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
+ webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ }
diff --git a/hypervideo_dl/extractor/viddler.py b/hypervideo_dl/extractor/viddler.py
new file mode 100644
index 0000000..6423584
--- /dev/null
+++ b/hypervideo_dl/extractor/viddler.py
@@ -0,0 +1,138 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class ViddlerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
+ _TESTS = [{
+ 'url': 'http://www.viddler.com/v/43903784',
+ 'md5': '9eee21161d2c7f5b39690c3e325fab2f',
+ 'info_dict': {
+ 'id': '43903784',
+ 'ext': 'mov',
+ 'title': 'Video Made Easy',
+ 'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
+ 'uploader': 'viddler',
+ 'timestamp': 1335371429,
+ 'upload_date': '20120425',
+ 'duration': 100.89,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
+ }
+ }, {
+ 'url': 'http://www.viddler.com/v/4d03aad9/',
+ 'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'ts',
+ 'title': 'WALL-TO-GORTAT',
+ 'upload_date': '20150126',
+ 'uploader': 'deadspin',
+ 'timestamp': 1422285291,
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'http://www.viddler.com/player/221ebbbd/0/',
+ 'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
+ 'info_dict': {
+ 'id': '221ebbbd',
+ 'ext': 'mov',
+ 'title': 'LETeens-Grammar-snack-third-conditional',
+ 'description': ' ',
+ 'upload_date': '20140929',
+ 'uploader': 'BCLETeens',
+ 'timestamp': 1411997190,
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ # secret protected
+ 'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
+ 'info_dict': {
+ 'id': '890c0985',
+ 'ext': 'mp4',
+ 'title': 'Complete Property Training - Traineeships',
+ 'description': ' ',
+ 'upload_date': '20130606',
+ 'uploader': 'TiffanyBowtell',
+ 'timestamp': 1370496993,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, secret = re.match(self._VALID_URL, url).groups()
+
+ query = {
+ 'video_id': video_id,
+ 'key': 'v0vhrt7bg2xq1vyxhkct',
+ }
+ if secret:
+ query['secret'] = secret
+
+ data = self._download_json(
+ 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
+ video_id, headers={'Referer': url}, query=query)['video']
+
+ formats = []
+ for filed in data['files']:
+ if filed.get('status', 'ready') != 'ready':
+ continue
+ format_id = filed.get('profile_id') or filed['profile_name']
+ f = {
+ 'format_id': format_id,
+ 'format_note': filed['profile_name'],
+ 'url': self._proto_relative_url(filed['url']),
+ 'width': int_or_none(filed.get('width')),
+ 'height': int_or_none(filed.get('height')),
+ 'filesize': int_or_none(filed.get('size')),
+ 'ext': filed.get('ext'),
+ 'source_preference': -1,
+ }
+ formats.append(f)
+
+ if filed.get('cdn_url'):
+ f = f.copy()
+ f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
+ f['format_id'] = format_id + '-cdn'
+ f['source_preference'] = 1
+ formats.append(f)
+
+ if filed.get('html5_video_source'):
+ f = f.copy()
+ f['url'] = self._proto_relative_url(filed['html5_video_source'])
+ f['format_id'] = format_id + '-html5'
+ f['source_preference'] = 0
+ formats.append(f)
+ self._sort_formats(formats)
+
+ categories = [
+ t.get('text') for t in data.get('tags', []) if 'text' in t]
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'description': data.get('description'),
+ 'timestamp': int_or_none(data.get('upload_time')),
+ 'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
+ 'uploader': data.get('author'),
+ 'duration': float_or_none(data.get('length')),
+ 'view_count': int_or_none(data.get('view_count')),
+ 'comment_count': int_or_none(data.get('comment_count')),
+ 'categories': categories,
+ }
diff --git a/hypervideo_dl/extractor/videa.py b/hypervideo_dl/extractor/videa.py
new file mode 100644
index 0000000..ab2c15c
--- /dev/null
+++ b/hypervideo_dl/extractor/videa.py
@@ -0,0 +1,173 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+import string
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_codecs,
+ update_url_query,
+ xpath_element,
+ xpath_text,
+)
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+ compat_struct_pack,
+)
+
+
+class VideaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ videa(?:kid)?\.hu/
+ (?:
+ videok/(?:[^/]+/)*[^?#&]+-|
+ (?:videojs_)?player\?.*?\bv=|
+ player/v/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
+ 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+ 'info_dict': {
+ 'id': '8YfIAjxwWGwT8HVQ',
+ 'ext': 'mp4',
+ 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 21,
+ },
+ }, {
+ 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }]
+ _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
+ webpage)]
+
+ @staticmethod
+ def rc4(cipher_text, key):
+ res = b''
+
+ key_len = len(key)
+ S = list(range(256))
+
+ j = 0
+ for i in range(256):
+ j = (j + S[i] + ord(key[i % key_len])) % 256
+ S[i], S[j] = S[j], S[i]
+
+ i = 0
+ j = 0
+ for m in range(len(cipher_text)):
+ i = (i + 1) % 256
+ j = (j + S[i]) % 256
+ S[i], S[j] = S[j], S[i]
+ k = S[(S[i] + S[j]) % 256]
+ res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
+
+ return res.decode()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = {'v': video_id}
+ player_page = self._download_webpage(
+ 'https://videa.hu/player', video_id, query=query)
+
+ nonce = self._search_regex(
+ r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
+ l = nonce[:32]
+ s = nonce[32:]
+ result = ''
+ for i in range(0, 32):
+ result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
+
+ random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
+ query['_s'] = random_seed
+ query['_t'] = result[:16]
+
+ b64_info, handle = self._download_webpage_handle(
+ 'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
+ if b64_info.startswith('<?xml'):
+ info = self._parse_xml(b64_info, video_id)
+ else:
+ key = result[16:] + random_seed + handle.headers['x-videa-xs']
+ info = self._parse_xml(self.rc4(
+ compat_b64decode(b64_info), key), video_id)
+
+ video = xpath_element(info, './video', 'video')
+ if not video:
+ raise ExtractorError(xpath_element(
+ info, './error', fatal=True), expected=True)
+ sources = xpath_element(
+ info, './video_sources', 'sources', fatal=True)
+ hash_values = xpath_element(
+ info, './hash_values', 'hash values', fatal=True)
+
+ title = xpath_text(video, './title', fatal=True)
+
+ formats = []
+ for source in sources.findall('./video_source'):
+ source_url = source.text
+ source_name = source.get('name')
+ source_exp = source.get('exp')
+ if not (source_url and source_name and source_exp):
+ continue
+ hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
+ if not hash_value:
+ continue
+ source_url = update_url_query(source_url, {
+ 'md5': hash_value,
+ 'expires': source_exp,
+ })
+ f = parse_codecs(source.get('codecs'))
+ f.update({
+ 'url': self._proto_relative_url(source_url),
+ 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
+ 'format_id': source.get('name'),
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
+
+ age_limit = None
+ is_adult = xpath_text(video, './is_adult_content', default=None)
+ if is_adult:
+ age_limit = 18 if is_adult == '1' else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(xpath_text(video, './duration')),
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/videodetective.py b/hypervideo_dl/extractor/videodetective.py
new file mode 100644
index 0000000..fe70db7
--- /dev/null
+++ b/hypervideo_dl/extractor/videodetective.py
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .internetvideoarchive import InternetVideoArchiveIE
+
+
+class VideoDetectiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
+ 'info_dict': {
+ 'id': '194487',
+ 'ext': 'mp4',
+ 'title': 'Kick-Ass 2',
+ 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = 'customerid=69249&publishedid=' + video_id
+ return self.url_result(
+ InternetVideoArchiveIE._build_json_url(query),
+ ie=InternetVideoArchiveIE.ie_key())
diff --git a/hypervideo_dl/extractor/videofyme.py b/hypervideo_dl/extractor/videofyme.py
new file mode 100644
index 0000000..cd3f50a
--- /dev/null
+++ b/hypervideo_dl/extractor/videofyme.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class VideofyMeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
+ IE_NAME = 'videofy.me'
+
+ _TEST = {
+ 'url': 'http://www.videofy.me/thisisvideofyme/1100701',
+ 'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
+ 'info_dict': {
+ 'id': '1100701',
+ 'ext': 'mp4',
+ 'title': 'This is VideofyMe',
+ 'description': '',
+ 'upload_date': '20130326',
+ 'timestamp': 1364288959,
+ 'uploader': 'VideofyMe',
+ 'uploader_id': 'thisisvideofyme',
+ 'view_count': int,
+ 'likes': int,
+ 'comment_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ config = self._download_json('http://vf-player-info-loader.herokuapp.com/%s.json' % video_id, video_id)['videoinfo']
+
+ video = config.get('video')
+ blog = config.get('blog', {})
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'url': video['sources']['source']['url'],
+ 'thumbnail': video.get('thumb'),
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('date')),
+ 'uploader': blog.get('name'),
+ 'uploader_id': blog.get('identifier'),
+ 'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)),
+ 'likes': int_or_none(video.get('likes')),
+ 'comment_count': int_or_none(video.get('nrOfComments')),
+ }
diff --git a/hypervideo_dl/extractor/videomore.py b/hypervideo_dl/extractor/videomore.py
new file mode 100644
index 0000000..e0c10aa
--- /dev/null
+++ b/hypervideo_dl/extractor/videomore.py
@@ -0,0 +1,322 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class VideomoreBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://more.tv/api/v3/web/'
+ _VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/'
+
+ def _download_page_data(self, display_id):
+ return self._download_json(
+ self._API_BASE_URL + 'PageData', display_id, query={
+ 'url': '/' + display_id,
+ })['attributes']['response']['data']
+
+ def _track_url_result(self, track):
+ track_vod = track['trackVod']
+ video_url = track_vod.get('playerLink') or track_vod['link']
+ return self.url_result(
+ video_url, VideomoreIE.ie_key(), track_vod.get('hubId'))
+
+
+class VideomoreIE(InfoExtractor):
+ IE_NAME = 'videomore'
+ _VALID_URL = r'''(?x)
+ videomore:(?P<sid>\d+)$|
+ https?://
+ (?:
+ videomore\.ru/
+ (?:
+ embed|
+ [^/]+/[^/]+
+ )/|
+ (?:
+ (?:player\.)?videomore\.ru|
+ siren\.more\.tv/player
+ )/[^/]*\?.*?\btrack_id=|
+ odysseus\.more.tv/player/(?P<partner_id>\d+)/
+ )
+ (?P<id>\d+)
+ (?:[/?#&]|\.(?:xml|json)|$)
+ '''
+ _TESTS = [{
+ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
+ 'md5': '44455a346edc0d509ac5b5a5b531dc35',
+ 'info_dict': {
+ 'id': '367617',
+ 'ext': 'flv',
+ 'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
+ 'series': 'Кино в деталях',
+ 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2910,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 16,
+ },
+ 'skip': 'The video is not available for viewing.',
+ }, {
+ 'url': 'http://videomore.ru/embed/259974',
+ 'info_dict': {
+ 'id': '259974',
+ 'ext': 'mp4',
+ 'title': 'Молодежка 2 сезон 40 серия',
+ 'series': 'Молодежка',
+ 'season': '2 сезон',
+ 'episode': '40 серия',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2789,
+ 'view_count': int,
+ 'age_limit': 16,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo/341073',
+ 'info_dict': {
+ 'id': '341073',
+ 'ext': 'flv',
+ 'title': 'Промо Команда проиграла из-за Бакина?',
+ 'episode': 'Команда проиграла из-за Бакина?',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 29,
+ 'age_limit': 16,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'The video is not available for viewing.',
+ }, {
+ 'url': 'http://videomore.ru/elki_3?track_id=364623',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/embed/364623',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/video/tracks/364623.xml',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/video/tracks/364623.json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
+ 'only_matching': True,
+ }, {
+ 'url': 'videomore:367617',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://odysseus.more.tv/player/1788/352317',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
+ webpage)
+ if not mobj:
+ mobj = re.search(
+ r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
+ webpage)
+
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('sid') or mobj.group('id')
+ partner_id = mobj.group('partner_id') or compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('partner_id', [None])[0] or '97'
+
+ item = self._download_json(
+ 'https://siren.more.tv/player/config', video_id, query={
+ 'partner_id': partner_id,
+ 'track_id': video_id,
+ })['data']['playlist']['items'][0]
+
+ title = item.get('title')
+ series = item.get('project_name')
+ season = item.get('season_name')
+ episode = item.get('episode_name')
+ if not title:
+ title = []
+ for v in (series, season, episode):
+ if v:
+ title.append(v)
+ title = ' '.join(title)
+
+ streams = item.get('streams') or []
+ for protocol in ('DASH', 'HLS'):
+ stream_url = item.get(protocol.lower() + '_url')
+ if stream_url:
+ streams.append({'protocol': protocol, 'url': stream_url})
+
+ formats = []
+ for stream in streams:
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ protocol = stream.get('protocol')
+ if protocol == 'DASH':
+ formats.extend(self._extract_mpd_formats(
+ stream_url, video_id, mpd_id='dash', fatal=False))
+ elif protocol == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif protocol == 'MSS':
+ formats.extend(self._extract_ism_formats(
+ stream_url, video_id, ism_id='mss', fatal=False))
+
+ if not formats:
+ error = item.get('error')
+ if error:
+ if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
+ self.raise_geo_restricted(countries=['RU'])
+ raise ExtractorError(error, expected=True)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': series,
+ 'season': season,
+ 'episode': episode,
+ 'thumbnail': item.get('thumbnail_url'),
+ 'duration': int_or_none(item.get('duration')),
+ 'view_count': int_or_none(item.get('views')),
+ 'age_limit': int_or_none(item.get('min_age')),
+ 'formats': formats,
+ }
+
+
+class VideomoreVideoIE(VideomoreBaseIE):
+ IE_NAME = 'videomore:video'
+ _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$'
+ _TESTS = [{
+ # single video with og:video:iframe
+ 'url': 'http://videomore.ru/elki_3',
+ 'info_dict': {
+ 'id': '364623',
+ 'ext': 'flv',
+ 'title': 'Ёлки 3',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 5579,
+ 'age_limit': 6,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ # season single series with og:video:iframe
+ 'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
+ 'info_dict': {
+ 'id': '352317',
+ 'ext': 'mp4',
+ 'title': 'Последний мент 1 сезон 14 серия',
+ 'series': 'Последний мент',
+ 'season': '1 сезон',
+ 'episode': '14 серия',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2464,
+ 'age_limit': 16,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
+ 'only_matching': True,
+ }, {
+ # single video without og:video:iframe
+ 'url': 'http://videomore.ru/marin_i_ego_druzya',
+ 'info_dict': {
+ 'id': '359073',
+ 'ext': 'flv',
+ 'title': '1 серия. Здравствуй, Аквавилль!',
+ 'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 754,
+ 'age_limit': 6,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'redirects to https://more.tv/'
+ }, {
+ 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._track_url_result(self._download_page_data(display_id))
+
+
+class VideomoreSeasonIE(VideomoreBaseIE):
+ IE_NAME = 'videomore:season'
+ _VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
+ _TESTS = [{
+ 'url': 'http://videomore.ru/molodezhka/film_o_filme',
+ 'info_dict': {
+ 'id': 'molodezhka/film_o_filme',
+ 'title': 'Фильм о фильме',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://more.tv/molodezhka/film_o_filme',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
+ else super(VideomoreSeasonIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ season = self._download_page_data(display_id)
+ season_id = compat_str(season['id'])
+ tracks = self._download_json(
+ self._API_BASE_URL + 'seasons/%s/tracks' % season_id,
+ season_id)['data']
+ entries = []
+ for track in tracks:
+ entries.append(self._track_url_result(track))
+ return self.playlist_result(entries, display_id, season.get('title'))
diff --git a/hypervideo_dl/extractor/videopress.py b/hypervideo_dl/extractor/videopress.py
new file mode 100644
index 0000000..6376ff0
--- /dev/null
+++ b/hypervideo_dl/extractor/videopress.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ qualities,
+ random_birthday,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class VideoPressIE(InfoExtractor):
+ _ID_REGEX = r'[\da-zA-Z]{8}'
+ _PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
+ _VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
+ _TESTS = [{
+ 'url': 'https://videopress.com/embed/kUJmAcSf',
+ 'md5': '706956a6c875873d51010921310e4bc6',
+ 'info_dict': {
+ 'id': 'kUJmAcSf',
+ 'ext': 'mp4',
+ 'title': 'VideoPress Demo',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 634.6,
+ 'timestamp': 1434983935,
+ 'upload_date': '20150622',
+ 'age_limit': 0,
+ },
+ }, {
+ # 17+, requires birth_* params
+ 'url': 'https://videopress.com/embed/iH3gstfZ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.wordpress.com/embed/kUJmAcSf',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ query = random_birthday('birth_year', 'birth_month', 'birth_day')
+ query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width'
+ video = self._download_json(
+ 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
+ video_id, query=query)
+
+ title = video['title']
+
+ file_url_base = video.get('file_url_base') or {}
+ base_url = file_url_base.get('https') or file_url_base.get('http')
+
+ QUALITIES = ('std', 'dvd', 'hd')
+ quality = qualities(QUALITIES)
+
+ formats = []
+ for format_id, f in (video.get('files') or {}).items():
+ if not isinstance(f, dict):
+ continue
+ for ext, path in f.items():
+ if ext in ('mp4', 'ogg'):
+ formats.append({
+ 'url': urljoin(base_url, path),
+ 'format_id': '%s-%s' % (format_id, ext),
+ 'ext': determine_ext(path, ext),
+ 'quality': quality(format_id),
+ })
+ original_url = video.get('original')
+ if original_url:
+ formats.append({
+ 'url': original_url,
+ 'format_id': 'original',
+ 'quality': len(QUALITIES),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('poster'),
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'timestamp': unified_timestamp(video.get('upload_date')),
+ 'age_limit': parse_age_limit(video.get('rating')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/vidio.py b/hypervideo_dl/extractor/vidio.py
new file mode 100644
index 0000000..b1243e8
--- /dev/null
+++ b/hypervideo_dl/extractor/vidio.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ strip_or_none,
+ try_get,
+)
+
+
+class VidioIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
+ 'md5': 'cd2801394afc164e9775db6a140b91fe',
+ 'info_dict': {
+ 'id': '165683',
+ 'display_id': 'dj_ambred-booyah-live-2015',
+ 'ext': 'mp4',
+ 'title': 'DJ_AMBRED - Booyah (Live 2015)',
+ 'description': 'md5:27dc15f819b6a78a626490881adbadf8',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 149,
+ 'like_count': int,
+ 'uploader': 'TWELVE Pic',
+ 'timestamp': 1444902800,
+ 'upload_date': '20151015',
+ 'uploader_id': 'twelvepictures',
+ 'channel': 'Cover Music Video',
+ 'channel_id': '280236',
+ 'view_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'tags': 'count:4',
+ },
+ }, {
+ 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._api_key = self._download_json(
+ 'https://www.vidio.com/auth', None, data=b'')['api_key']
+
+ def _real_extract(self, url):
+ video_id, display_id = re.match(self._VALID_URL, url).groups()
+ data = self._download_json(
+ 'https://api.vidio.com/videos/' + video_id, display_id, headers={
+ 'Content-Type': 'application/vnd.api+json',
+ 'X-API-KEY': self._api_key,
+ })
+ video = data['videos'][0]
+ title = video['title'].strip()
+
+ formats = self._extract_m3u8_formats(
+ data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
+ channel = get_first('channel')
+ user = get_first('user')
+ username = user.get('username')
+ get_count = lambda x: int_or_none(video.get('total_' + x))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': strip_or_none(video.get('description')),
+ 'thumbnail': video.get('image_url_medium'),
+ 'duration': int_or_none(video.get('duration')),
+ 'like_count': get_count('likes'),
+ 'formats': formats,
+ 'uploader': user.get('name'),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'uploader_id': username,
+ 'uploader_url': 'https://www.vidio.com/@' + username if username else None,
+ 'channel': channel.get('name'),
+ 'channel_id': str_or_none(channel.get('id')),
+ 'view_count': get_count('view_count'),
+ 'dislike_count': get_count('dislikes'),
+ 'comment_count': get_count('comments'),
+ 'tags': video.get('tag_list'),
+ }
diff --git a/hypervideo_dl/extractor/vidlii.py b/hypervideo_dl/extractor/vidlii.py
new file mode 100644
index 0000000..f477425
--- /dev/null
+++ b/hypervideo_dl/extractor/vidlii.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ get_element_by_id,
+ int_or_none,
+ strip_or_none,
+ unified_strdate,
+ urljoin,
+)
+
+
+class VidLiiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidlii\.com/(?:watch|embed)\?.*?\bv=(?P<id>[0-9A-Za-z_-]{11})'
+ _TESTS = [{
+ 'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v',
+ 'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2',
+ 'info_dict': {
+ 'id': 'tJluaH4BJ3v',
+ 'ext': 'mp4',
+ 'title': 'Vidlii is against me',
+ 'description': 'md5:fa3f119287a2bfb922623b52b1856145',
+ 'thumbnail': 're:https://.*.jpg',
+ 'uploader': 'APPle5auc31995',
+ 'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995',
+ 'upload_date': '20171107',
+ 'duration': 212,
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['News & Politics'],
+ 'tags': ['Vidlii', 'Jan', 'Videogames'],
+ }
+ }, {
+ 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
+
+ video_url = self._search_regex(
+ r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
+ 'video url', group='url')
+
+ title = self._search_regex(
+ (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
+ 'title')
+
+ description = self._html_search_meta(
+ ('description', 'twitter:description'), webpage,
+ default=None) or strip_or_none(
+ get_element_by_id('des_text', webpage))
+
+ thumbnail = self._html_search_meta(
+ 'twitter:image', webpage, default=None)
+ if not thumbnail:
+ thumbnail_path = self._search_regex(
+ r'img\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='url')
+ if thumbnail_path:
+ thumbnail = urljoin(url, thumbnail_path)
+
+ uploader = self._search_regex(
+ r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
+ webpage, 'uploader', fatal=False)
+ uploader_url = 'https://www.vidlii.com/user/%s' % uploader if uploader else None
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'datePublished', webpage, default=None) or self._search_regex(
+ r'<date>([^<]+)', webpage, 'upload date', fatal=False))
+
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration',
+ default=None) or self._search_regex(
+ r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ (r'<strong>(\d+)</strong> views',
+ r'Views\s*:\s*<strong>(\d+)</strong>'),
+ webpage, 'view count', fatal=False))
+
+ comment_count = int_or_none(self._search_regex(
+ (r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
+ r'Comments\s*:\s*<strong>(\d+)'),
+ webpage, 'comment count', fatal=False))
+
+ average_rating = float_or_none(self._search_regex(
+ r'rating\s*:\s*([\d.]+)', webpage, 'average rating', fatal=False))
+
+ category = self._html_search_regex(
+ r'<div>Category\s*:\s*</div>\s*<div>\s*<a[^>]+>([^<]+)', webpage,
+ 'category', fatal=False)
+ categories = [category] if category else None
+
+ tags = [
+ strip_or_none(tag)
+ for tag in re.findall(
+ r'<a[^>]+\bhref=["\']/results\?.*?q=[^>]*>([^<]+)',
+ webpage) if strip_or_none(tag)
+ ] or None
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'tags': tags,
+ }
diff --git a/hypervideo_dl/extractor/vidme.py b/hypervideo_dl/extractor/vidme.py
new file mode 100644
index 0000000..174e69c
--- /dev/null
+++ b/hypervideo_dl/extractor/vidme.py
@@ -0,0 +1,295 @@
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
+ url_or_none,
+)
+
+
+class VidmeIE(InfoExtractor):
+ IE_NAME = 'vidme'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
+ _TESTS = [{
+ 'url': 'https://vid.me/QNB',
+ 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+ 'info_dict': {
+ 'id': 'QNB',
+ 'ext': 'mp4',
+ 'title': 'Fishing for piranha - the easy way',
+ 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1406313244,
+ 'upload_date': '20140725',
+ 'age_limit': 0,
+ 'duration': 119.92,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://vid.me/Gc6M',
+ 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+ 'info_dict': {
+ 'id': 'Gc6M',
+ 'ext': 'mp4',
+ 'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1441211642,
+ 'upload_date': '20150902',
+ 'uploader': 'SunshineM',
+ 'uploader_id': '3552827',
+ 'age_limit': 0,
+ 'duration': 223.72,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # tests uploader field
+ 'url': 'https://vid.me/4Iib',
+ 'info_dict': {
+ 'id': '4Iib',
+ 'ext': 'mp4',
+ 'title': 'The Carver',
+ 'description': 'md5:e9c24870018ae8113be936645b93ba3c',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1433203629,
+ 'upload_date': '20150602',
+ 'uploader': 'Thomas',
+ 'uploader_id': '109747',
+ 'age_limit': 0,
+ 'duration': 97.859999999999999,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
+ 'url': 'https://vid.me/e/Wmur',
+ 'info_dict': {
+ 'id': 'Wmur',
+ 'ext': 'mp4',
+ 'title': 'naked smoking & stretching',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1430931613,
+ 'upload_date': '20150506',
+ 'uploader': 'naked-yogi',
+ 'uploader_id': '1638622',
+ 'age_limit': 18,
+ 'duration': 653.26999999999998,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # nsfw, user-disabled
+ 'url': 'https://vid.me/dzGJ',
+ 'only_matching': True,
+ }, {
+ # suspended
+ 'url': 'https://vid.me/Ox3G',
+ 'only_matching': True,
+ }, {
+ # deleted
+ 'url': 'https://vid.me/KTPm',
+ 'only_matching': True,
+ }, {
+ # no formats in the API response
+ 'url': 'https://vid.me/e5g',
+ 'info_dict': {
+ 'id': 'e5g',
+ 'ext': 'mp4',
+ 'title': 'Video upload (e5g)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1401480195,
+ 'upload_date': '20140530',
+ 'uploader': None,
+ 'uploader_id': None,
+ 'age_limit': 0,
+ 'duration': 483,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(
+ 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ response = self._parse_json(e.cause.read(), video_id)
+ else:
+ raise
+
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
+ video = response['video']
+
+ if video.get('state') == 'deleted':
+ raise ExtractorError(
+ 'Vidme said: Sorry, this video has been deleted.',
+ expected=True)
+
+ if video.get('state') in ('user-disabled', 'suspended'):
+ raise ExtractorError(
+ 'Vidme said: This video has been suspended either due to a copyright claim, '
+ 'or for violating the terms of use.',
+ expected=True)
+
+ formats = []
+ for f in video.get('formats', []):
+ format_url = url_or_none(f.get('uri'))
+ if not format_url:
+ continue
+ format_type = f.get('type')
+ if format_type == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ elif format_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': f.get('type'),
+ 'url': format_url,
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'preference': 0 if f.get('type', '').endswith(
+ 'clip') else 1,
+ })
+
+ if not formats and video.get('complete_url'):
+ formats.append({
+ 'url': video.get('complete_url'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ })
+
+ self._sort_formats(formats)
+
+ title = video['title']
+ description = video.get('description')
+ thumbnail = video.get('thumbnail_url')
+ timestamp = parse_iso8601(video.get('date_created'), ' ')
+ uploader = video.get('user', {}).get('username')
+ uploader_id = video.get('user', {}).get('user_id')
+ age_limit = 18 if video.get('nsfw') is True else 0
+ duration = float_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('view_count'))
+ like_count = int_or_none(video.get('likes_count'))
+ comment_count = int_or_none(video.get('comment_count'))
+
+ return {
+ 'id': video_id,
+ 'title': title or 'Video upload (%s)' % video_id,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'age_limit': age_limit,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
+
+
+class VidmeListBaseIE(InfoExtractor):
+ # Max possible limit according to https://docs.vid.me/#api-Videos-List
+ _LIMIT = 100
+
+ def _entries(self, user_id, user_name):
+ for page_num in itertools.count(1):
+ page = self._download_json(
+ 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
+ % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
+ user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
+
+ videos = page.get('videos', [])
+ if not videos:
+ break
+
+ for video in videos:
+ video_url = video.get('full_url') or video.get('embed_url')
+ if video_url:
+ yield self.url_result(video_url, VidmeIE.ie_key())
+
+ total = int_or_none(page.get('page', {}).get('total'))
+ if total and self._LIMIT * page_num >= total:
+ break
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+
+ user_id = self._download_json(
+ 'https://api.vid.me/userByUsername?username=%s' % user_name,
+ user_name)['user']['user_id']
+
+ return self.playlist_result(
+ self._entries(user_id, user_name), user_id,
+ '%s - %s' % (user_name, self._TITLE))
+
+
+class VidmeUserIE(VidmeListBaseIE):
+ IE_NAME = 'vidme:user'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
+ _API_ITEM = 'list'
+ _TITLE = 'Videos'
+ _TESTS = [{
+ 'url': 'https://vid.me/MasakoX',
+ 'info_dict': {
+ 'id': '16112341',
+ 'title': 'MasakoX - %s' % _TITLE,
+ },
+ 'playlist_mincount': 191,
+ }, {
+ 'url': 'https://vid.me/unsQuare_netWork',
+ 'only_matching': True,
+ }]
+
+
+class VidmeUserLikesIE(VidmeListBaseIE):
+ IE_NAME = 'vidme:user:likes'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
+ _API_ITEM = 'likes'
+ _TITLE = 'Likes'
+ _TESTS = [{
+ 'url': 'https://vid.me/ErinAlexis/likes',
+ 'info_dict': {
+ 'id': '6483530',
+ 'title': 'ErinAlexis - %s' % _TITLE,
+ },
+ 'playlist_mincount': 415,
+ }, {
+ 'url': 'https://vid.me/Kaleidoscope-Ish/likes',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/vier.py b/hypervideo_dl/extractor/vier.py
new file mode 100644
index 0000000..dbd5ba9
--- /dev/null
+++ b/hypervideo_dl/extractor/vier.py
@@ -0,0 +1,264 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ urlencode_postdata,
+ int_or_none,
+ unified_strdate,
+)
+
+
+class VierIE(InfoExtractor):
+ IE_NAME = 'vier'
+ IE_DESC = 'vier.be and vijf.be'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?P<site>vier|vijf)\.be/
+ (?:
+ (?:
+ [^/]+/videos|
+ video(?:/[^/]+)*
+ )/
+ (?P<display_id>[^/]+)(?:/(?P<id>\d+))?|
+ (?:
+ video/v3/embed|
+ embed/video/public
+ )/(?P<embed_id>\d+)
+ )
+ '''
+ _NETRC_MACHINE = 'vier'
+ _TESTS = [{
+ 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
+ 'md5': 'e4ae2054a6b040ef1e289e20d111b46e',
+ 'info_dict': {
+ 'id': '16129',
+ 'display_id': 'het-wordt-warm-de-moestuin',
+ 'ext': 'mp4',
+ 'title': 'Het wordt warm in De Moestuin',
+ 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
+ 'upload_date': '20121025',
+ 'series': 'Plan B',
+ 'tags': ['De Moestuin', 'Moestuin', 'meisjes', 'Tomaat', 'Wim', 'Droom'],
+ },
+ }, {
+ 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
+ 'info_dict': {
+ 'id': '2561614',
+ 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
+ 'ext': 'mp4',
+ 'title': 'md5:84f45fe48b8c1fa296a7f6d208d080a7',
+ 'description': 'md5:0356d4981e58b8cbee19355cbd51a8fe',
+ 'upload_date': '20170228',
+ 'series': 'Temptation Island',
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839',
+ 'info_dict': {
+ 'id': '2674839',
+ 'display_id': 'jani-gaat-naar-tokio-aflevering-4',
+ 'ext': 'mp4',
+ 'title': 'Jani gaat naar Tokio - Aflevering 4',
+ 'description': 'md5:aa8d611541db6ae9e863125704511f88',
+ 'upload_date': '20170501',
+ 'series': 'Jani gaat',
+ 'episode_number': 4,
+ 'tags': ['Jani Gaat', 'Volledige Aflevering'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires account credentials',
+ }, {
+ # Requires account credentials but bypassed extraction via v3/embed page
+ # without metadata
+ 'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839',
+ 'info_dict': {
+ 'id': '2674839',
+ 'display_id': 'jani-gaat-naar-tokio-aflevering-4',
+ 'ext': 'mp4',
+ 'title': 'jani-gaat-naar-tokio-aflevering-4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Log in to extract metadata'],
+ }, {
+ # Without video id in URL
+ 'url': 'http://www.vier.be/planb/videos/dit-najaar-plan-b',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.vier.be/video/v3/embed/16129',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vijf.be/embed/video/public/4093',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, site):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ return
+
+ login_page = self._download_webpage(
+ 'http://www.%s.be/user/login' % site,
+ None, note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata({
+ 'form_id': 'user_login',
+ 'name': username,
+ 'pass': password,
+ }),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ login_error = self._html_search_regex(
+ r'(?s)<div class="messages error">\s*<div>\s*<h2.+?</h2>(.+?)<',
+ login_page, 'login error', default=None)
+ if login_error:
+ self.report_warning('Unable to log in: %s' % login_error)
+ else:
+ self._logged_in = True
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ embed_id = mobj.group('embed_id')
+ display_id = mobj.group('display_id') or embed_id
+ video_id = mobj.group('id') or embed_id
+ site = mobj.group('site')
+
+ if not self._logged_in:
+ self._login(site)
+
+ webpage = self._download_webpage(url, display_id)
+
+ if r'id="user-login"' in webpage:
+ self.report_warning(
+ 'Log in to extract metadata', video_id=display_id)
+ webpage = self._download_webpage(
+ 'http://www.%s.be/video/v3/embed/%s' % (site, video_id),
+ display_id)
+
+ video_id = self._search_regex(
+ [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
+ webpage, 'video id', default=video_id or display_id)
+
+ playlist_url = self._search_regex(
+ r'data-file=(["\'])(?P<url>(?:https?:)?//[^/]+/.+?\.m3u8.*?)\1',
+ webpage, 'm3u8 url', default=None, group='url')
+
+ if not playlist_url:
+ application = self._search_regex(
+ [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
+ webpage, 'application', default=site + '_vod')
+ filename = self._search_regex(
+ [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
+ webpage, 'filename')
+ playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
+
+ formats = self._extract_wowza_formats(
+ playlist_url, display_id, skip_protocols=['dash'])
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage, default=display_id)
+ description = self._html_search_regex(
+ r'(?s)<div\b[^>]+\bclass=(["\'])[^>]*?\bfield-type-text-with-summary\b[^>]*?\1[^>]*>.*?<p>(?P<value>.+?)</p>',
+ webpage, 'description', default=None, group='value')
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ upload_date = unified_strdate(self._html_search_regex(
+ r'(?s)<div\b[^>]+\bclass=(["\'])[^>]*?\bfield-name-post-date\b[^>]*?\1[^>]*>.*?(?P<value>\d{2}/\d{2}/\d{4})',
+ webpage, 'upload date', default=None, group='value'))
+
+ series = self._search_regex(
+ r'data-program=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'series', default=None, group='value')
+ episode_number = int_or_none(self._search_regex(
+ r'(?i)aflevering (\d+)', title, 'episode number', default=None))
+ tags = re.findall(r'<a\b[^>]+\bhref=["\']/tags/[^>]+>([^<]+)<', webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'series': series,
+ 'episode_number': episode_number,
+ 'tags': tags,
+ 'formats': formats,
+ }
+
+
+class VierVideosIE(InfoExtractor):
+ IE_NAME = 'vier:videos'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
+ _TESTS = [{
+ 'url': 'http://www.vier.be/demoestuin/videos',
+ 'info_dict': {
+ 'id': 'demoestuin',
+ },
+ 'playlist_mincount': 153,
+ }, {
+ 'url': 'http://www.vijf.be/temptationisland/videos',
+ 'info_dict': {
+ 'id': 'temptationisland',
+ },
+ 'playlist_mincount': 159,
+ }, {
+ 'url': 'http://www.vier.be/demoestuin/videos?page=6',
+ 'info_dict': {
+ 'id': 'demoestuin-page6',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'http://www.vier.be/demoestuin/videos?page=7',
+ 'info_dict': {
+ 'id': 'demoestuin-page7',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ program = mobj.group('program')
+ site = mobj.group('site')
+
+ page_id = mobj.group('page')
+ if page_id:
+ page_id = int(page_id)
+ start_page = page_id
+ playlist_id = '%s-page%d' % (program, page_id)
+ else:
+ start_page = 0
+ playlist_id = program
+
+ entries = []
+ for current_page_id in itertools.count(start_page):
+ current_page = self._download_webpage(
+ 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id),
+ program,
+ 'Downloading page %d' % (current_page_id + 1))
+ page_entries = [
+ self.url_result('http://www.' + site + '.be' + video_url, 'Vier')
+ for video_url in re.findall(
+ r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+ entries.extend(page_entries)
+ if page_id or '>Meer<' not in current_page:
+ break
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/viewlift.py b/hypervideo_dl/extractor/viewlift.py
new file mode 100644
index 0000000..d6b92b1
--- /dev/null
+++ b/hypervideo_dl/extractor/viewlift.py
@@ -0,0 +1,250 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+)
+
+
+class ViewLiftBaseIE(InfoExtractor):
+ _API_BASE = 'https://prod-api.viewlift.com/'
+ _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
+ _SITE_MAP = {
+ 'ftfnext': 'lax',
+ 'funnyforfree': 'snagfilms',
+ 'hoichoi': 'hoichoitv',
+ 'kiddovid': 'snagfilms',
+ 'laxsportsnetwork': 'lax',
+ 'legapallacanestro': 'lnp',
+ 'marquee': 'marquee-tv',
+ 'monumentalsportsnetwork': 'monumental-network',
+ 'moviespree': 'bingeflix',
+ 'pflmma': 'pfl',
+ 'snagxtreme': 'snagfilms',
+ 'theidentitytb': 'tampabay',
+ 'vayafilm': 'snagfilms',
+ }
+ _TOKENS = {}
+
+ def _call_api(self, site, path, video_id, query):
+ token = self._TOKENS.get(site)
+ if not token:
+ token_query = {'site': site}
+ email, password = self._get_login_info(netrc_machine=site)
+ if email:
+ resp = self._download_json(
+ self._API_BASE + 'identity/signin', video_id,
+ 'Logging in', query=token_query, data=json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode())
+ else:
+ resp = self._download_json(
+ self._API_BASE + 'identity/anonymous-token', video_id,
+ 'Downloading authorization token', query=token_query)
+ self._TOKENS[site] = token = resp['authorizationToken']
+ return self._download_json(
+ self._API_BASE + path, video_id,
+ headers={'Authorization': token}, query=query)
+
+
+class ViewLiftEmbedIE(ViewLiftBaseIE):
+ IE_NAME = 'viewlift:embed'
+ _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+ _TESTS = [{
+ 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
+ 'md5': '2924e9215c6eff7a55ed35b72276bd93',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ 'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
+ 'timestamp': 1334350096,
+ 'upload_date': '20120413',
+ }
+ }, {
+ # invalid labels, 360p is better that 480p
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'md5': '882fca19b9eb27ef865efeeaed376a48',
+ 'info_dict': {
+ 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'ext': 'mp4',
+ 'title': 'Life in Limbo',
+ },
+ 'skip': 'The video does not exist',
+ }, {
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ domain, film_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ try:
+ content_data = self._call_api(
+ site, 'entitlement/video/status', film_id, {
+ 'id': film_id
+ })['video']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
+ if error_message == 'User does not have a valid subscription or has not purchased this content.':
+ self.raise_login_required()
+ raise ExtractorError(error_message, expected=True)
+ raise
+ gist = content_data['gist']
+ title = gist['title']
+ video_assets = content_data['streamingInfo']['videoAssets']
+
+ formats = []
+ mpeg_video_assets = video_assets.get('mpeg') or []
+ for video_asset in mpeg_video_assets:
+ video_asset_url = video_asset.get('url')
+ if not video_asset:
+ continue
+ bitrate = int_or_none(video_asset.get('bitrate'))
+ height = int_or_none(self._search_regex(
+ r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+ 'height', default=None))
+ formats.append({
+ 'url': video_asset_url,
+ 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+ 'tbr': bitrate,
+ 'height': height,
+ 'vcodec': video_asset.get('codec'),
+ })
+
+ hls_url = video_assets.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+ info = {
+ 'id': film_id,
+ 'title': title,
+ 'description': gist.get('description'),
+ 'thumbnail': gist.get('videoImageUrl'),
+ 'duration': int_or_none(gist.get('runtime')),
+ 'age_limit': parse_age_limit(content_data.get('parentalRating')),
+ 'timestamp': int_or_none(gist.get('publishDate'), 1000),
+ 'formats': formats,
+ }
+ for k in ('categories', 'tags'):
+ info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+ return info
+
+
+class ViewLiftIE(ViewLiftBaseIE):
+ IE_NAME = 'viewlift'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
+ _TESTS = [{
+ 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
+ 'md5': '19844f897b35af219773fd63bdec2942',
+ 'info_dict': {
+ 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'display_id': 'lost_for_life',
+ 'ext': 'mp4',
+ 'title': 'Lost for Life',
+ 'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 4489,
+ 'categories': 'mincount:3',
+ 'age_limit': 14,
+ 'upload_date': '20150421',
+ 'timestamp': 1429656820,
+ }
+ }, {
+ 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
+ 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
+ 'info_dict': {
+ 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
+ 'display_id': 'the_world_cut_project/india',
+ 'ext': 'mp4',
+ 'title': 'India',
+ 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 979,
+ 'timestamp': 1399478279,
+ 'upload_date': '20140507',
+ }
+ }, {
+ 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
+ 'info_dict': {
+ 'id': '00000148-7b53-de26-a9fb-fbf306f70020',
+ 'display_id': 'augie_alone/s_2_ep_12_love',
+ 'ext': 'mp4',
+ 'title': 'S. 2 Ep. 12 - Love',
+ 'description': 'Augie finds love.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 107,
+ 'upload_date': '20141012',
+ 'timestamp': 1413129540,
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://main.snagfilms.com/films/title/the_freebie',
+ 'only_matching': True,
+ }, {
+ # Film is not playable in your area.
+ 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
+ 'only_matching': True,
+ }, {
+ # Film is not available.
+ 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.winnersview.com/videos/the-good-son',
+ 'only_matching': True,
+ }, {
+ # Was once Kaltura embed
+ 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ domain, path, display_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ modules = self._call_api(
+ site, 'content/pages', display_id, {
+ 'includeContent': 'true',
+ 'moduleOffset': 1,
+ 'path': path,
+ 'site': site,
+ })['modules']
+ film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+ 'id': film_id,
+ 'display_id': display_id,
+ 'ie_key': 'ViewLiftEmbed',
+ }
diff --git a/hypervideo_dl/extractor/viidea.py b/hypervideo_dl/extractor/viidea.py
new file mode 100644
index 0000000..a0abbae
--- /dev/null
+++ b/hypervideo_dl/extractor/viidea.py
@@ -0,0 +1,202 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class ViideaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
+ videolectures\.net|
+ flexilearn\.viidea\.net|
+ presentations\.ocwconsortium\.org|
+ video\.travel-zoom\.si|
+ video\.pomp-forum\.si|
+ tv\.nil\.si|
+ video\.hekovnik.com|
+ video\.szko\.si|
+ kpk\.viidea\.com|
+ inside\.viidea\.net|
+ video\.kiberpipa\.org|
+ bvvideo\.si|
+ kongres\.viidea\.net|
+ edemokracija\.viidea\.com
+ )(?:/lecture)?/(?P<id>[^/]+)(?:/video/(?P<part>\d+))?/*(?:[#?].*)?$'''
+
+ _TESTS = [{
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+ 'info_dict': {
+ 'id': '20171',
+ 'display_id': 'promogram_igor_mekjavic_eng',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1372349289,
+ 'upload_date': '20130627',
+ 'duration': 565,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # video with invalid direct format links (HTTP 403)
+ 'url': 'http://videolectures.net/russir2010_filippova_nlp/',
+ 'info_dict': {
+ 'id': '14891',
+ 'display_id': 'russir2010_filippova_nlp',
+ 'ext': 'flv',
+ 'title': 'NLP at Google',
+ 'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1284375600,
+ 'upload_date': '20100913',
+ 'duration': 5352,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ # event playlist
+ 'url': 'http://videolectures.net/deeplearning2015_montreal/',
+ 'info_dict': {
+ 'id': '23181',
+ 'title': 'Deep Learning Summer School, Montreal 2015',
+ 'description': 'md5:0533a85e4bd918df52a01f0e1ebe87b7',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1438560000,
+ },
+ 'playlist_count': 30,
+ }, {
+ # multi part lecture
+ 'url': 'http://videolectures.net/mlss09uk_bishop_ibi/',
+ 'info_dict': {
+ 'id': '9737',
+ 'display_id': 'mlss09uk_bishop_ibi',
+ 'title': 'Introduction To Bayesian Inference',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'timestamp': 1251622800,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '9737_part1',
+ 'display_id': 'mlss09uk_bishop_ibi_part1',
+ 'ext': 'wmv',
+ 'title': 'Introduction To Bayesian Inference (Part 1)',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 4622,
+ 'timestamp': 1251622800,
+ 'upload_date': '20090830',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '9737_part2',
+ 'display_id': 'mlss09uk_bishop_ibi_part2',
+ 'ext': 'wmv',
+ 'title': 'Introduction To Bayesian Inference (Part 2)',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'duration': 5641,
+ 'timestamp': 1251622800,
+ 'upload_date': '20090830',
+ },
+ }],
+ 'playlist_count': 2,
+ }]
+
+ def _real_extract(self, url):
+ lecture_slug, explicit_part_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, lecture_slug)
+
+ cfg = self._parse_json(self._search_regex(
+ [r'cfg\s*:\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*:\s*\(?\s*function',
+ r'cfg\s*:\s*({[^}]+})'],
+ webpage, 'cfg'), lecture_slug, js_to_json)
+
+ lecture_id = compat_str(cfg['obj_id'])
+
+ base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
+
+ try:
+ lecture_data = self._download_json(
+ '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
+ lecture_id)['lecture'][0]
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ msg = self._parse_json(
+ e.cause.read().decode('utf-8'), lecture_id)
+ raise ExtractorError(msg['detail'], expected=True)
+ raise
+
+ lecture_info = {
+ 'id': lecture_id,
+ 'display_id': lecture_slug,
+ 'title': lecture_data['title'],
+ 'timestamp': parse_iso8601(lecture_data.get('time')),
+ 'description': lecture_data.get('description_wiki'),
+ 'thumbnail': lecture_data.get('thumb'),
+ }
+
+ playlist_entries = []
+ lecture_type = lecture_data.get('type')
+ parts = [compat_str(video) for video in cfg.get('videos', [])]
+ if parts:
+ multipart = len(parts) > 1
+
+ def extract_part(part_id):
+ smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
+ smil = self._download_smil(smil_url, lecture_id)
+ info = self._parse_smil(smil, smil_url, lecture_id)
+ self._sort_formats(info['formats'])
+ info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
+ info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
+ if multipart:
+ info['title'] += ' (Part %s)' % part_id
+ switch = smil.find('.//switch')
+ if switch is not None:
+ info['duration'] = parse_duration(switch.attrib.get('dur'))
+ item_info = lecture_info.copy()
+ item_info.update(info)
+ return item_info
+
+ if explicit_part_id or not multipart:
+ result = extract_part(explicit_part_id or parts[0])
+ else:
+ result = {
+ '_type': 'multi_video',
+ 'entries': [extract_part(part) for part in parts],
+ }
+ result.update(lecture_info)
+
+ # Immediately return explicitly requested part or non event item
+ if explicit_part_id or lecture_type != 'evt':
+ return result
+
+ playlist_entries.append(result)
+
+ # It's probably a playlist
+ if not parts or lecture_type == 'evt':
+ playlist_webpage = self._download_webpage(
+ '%s/site/ajax/drilldown/?id=%s' % (base_url, lecture_id), lecture_id)
+ entries = [
+ self.url_result(compat_urlparse.urljoin(url, video_url), 'Viidea')
+ for _, video_url in re.findall(
+ r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', playlist_webpage)]
+ playlist_entries.extend(entries)
+
+ playlist = self.playlist_result(playlist_entries, lecture_id)
+ playlist.update(lecture_info)
+ return playlist
diff --git a/hypervideo_dl/extractor/viki.py b/hypervideo_dl/extractor/viki.py
new file mode 100644
index 0000000..2e9cbf1
--- /dev/null
+++ b/hypervideo_dl/extractor/viki.py
@@ -0,0 +1,433 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import hashlib
+import hmac
+import itertools
+import json
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ sanitized_Request,
+ std_headers,
+ try_get,
+)
+
+
+class VikiBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
+ _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
+ _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
+
+ _APP = '100005a'
+ _APP_VERSION = '6.0.0'
+ _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
+
+ _GEO_BYPASS = False
+ _NETRC_MACHINE = 'viki'
+
+ _token = None
+
+ _ERRORS = {
+ 'geo': 'Sorry, this content is not available in your region.',
+ 'upcoming': 'Sorry, this content is not yet available.',
+ 'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
+ }
+
+ def _prepare_call(self, path, timestamp=None, post_data=None):
+ path += '?' if '?' not in path else '&'
+ if not timestamp:
+ timestamp = int(time.time())
+ query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+ if self._token:
+ query += '&token=%s' % self._token
+ sig = hmac.new(
+ self._APP_SECRET.encode('ascii'),
+ query.encode('ascii'),
+ hashlib.sha1
+ ).hexdigest()
+ url = self._API_URL_TEMPLATE % (query, sig)
+ return sanitized_Request(
+ url, json.dumps(post_data).encode('utf-8')) if post_data else url
+
+ def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
+ resp = self._download_json(
+ self._prepare_call(path, timestamp, post_data), video_id, note,
+ headers={'x-viki-app-ver': self._APP_VERSION})
+
+ error = resp.get('error')
+ if error:
+ if error == 'invalid timestamp':
+ resp = self._download_json(
+ self._prepare_call(path, int(resp['current_timestamp']), post_data),
+ video_id, '%s (retry)' % note)
+ error = resp.get('error')
+ if error:
+ self._raise_error(resp['error'])
+
+ return resp
+
+ def _raise_error(self, error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error),
+ expected=True)
+
+ def _check_errors(self, data):
+ for reason, status in (data.get('blocking') or {}).items():
+ if status and reason in self._ERRORS:
+ message = self._ERRORS[reason]
+ if reason == 'geo':
+ self.raise_geo_restricted(msg=message)
+ elif reason == 'paywall':
+ self.raise_login_required(message)
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, message), expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'login_id': username,
+ 'password': password,
+ }
+
+ login = self._call_api(
+ 'sessions.json', None,
+ 'Logging in', post_data=login_form)
+
+ self._token = login.get('token')
+ if not self._token:
+ self.report_warning('Unable to get session token, login has probably failed')
+
+ @staticmethod
+ def dict_selection(dict_obj, preferred_key, allow_fallback=True):
+ if preferred_key in dict_obj:
+ return dict_obj.get(preferred_key)
+
+ if not allow_fallback:
+ return
+
+ filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
+ return filtered_dict[0] if filtered_dict else None
+
+
+class VikiIE(VikiBaseIE):
+ IE_NAME = 'viki'
+ _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
+ 'info_dict': {
+ 'id': '1023585v',
+ 'ext': 'mp4',
+ 'title': 'Heirs - Episode 14',
+ 'uploader': 'SBS Contents Hub',
+ 'timestamp': 1385047627,
+ 'upload_date': '20131121',
+ 'age_limit': 13,
+ 'duration': 3570,
+ 'episode_number': 14,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'skip': 'Blocked in the US',
+ 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
+ }, {
+ # clip
+ 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
+ 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
+ 'info_dict': {
+ 'id': '1067139v',
+ 'ext': 'mp4',
+ 'title': "'The Avengers: Age of Ultron' Press Conference",
+ 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+ 'duration': 352,
+ 'timestamp': 1430380829,
+ 'upload_date': '20150430',
+ 'uploader': 'Arirang TV',
+ 'like_count': int,
+ 'age_limit': 0,
+ },
+ 'skip': 'Sorry. There was an error loading this video',
+ }, {
+ 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
+ 'info_dict': {
+ 'id': '1048879v',
+ 'ext': 'mp4',
+ 'title': 'Ankhon Dekhi',
+ 'duration': 6512,
+ 'timestamp': 1408532356,
+ 'upload_date': '20140820',
+ 'uploader': 'Spuul',
+ 'like_count': int,
+ 'age_limit': 13,
+ },
+ 'skip': 'Blocked in the US',
+ }, {
+ # episode
+ 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
+ 'md5': '0a53dc252e6e690feccd756861495a8c',
+ 'info_dict': {
+ 'id': '44699v',
+ 'ext': 'mp4',
+ 'title': 'Boys Over Flowers - Episode 1',
+ 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
+ 'duration': 4172,
+ 'timestamp': 1270496524,
+ 'upload_date': '20100405',
+ 'uploader': 'group8',
+ 'like_count': int,
+ 'age_limit': 13,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
+ }, {
+ # youtube external
+ 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+ 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
+ 'info_dict': {
+ 'id': '50562v',
+ 'ext': 'webm',
+ 'title': 'Poor Nastya [COMPLETE] - Episode 1',
+ 'description': '',
+ 'duration': 606,
+ 'timestamp': 1274949505,
+ 'upload_date': '20101213',
+ 'uploader': 'ad14065n',
+ 'uploader_id': 'ad14065n',
+ 'like_count': int,
+ 'age_limit': 13,
+ },
+ 'skip': 'Page not found!',
+ }, {
+ 'url': 'http://www.viki.com/player/44699v',
+ 'only_matching': True,
+ }, {
+ # non-English description
+ 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
+ 'md5': '41faaba0de90483fb4848952af7c7d0d',
+ 'info_dict': {
+ 'id': '158036v',
+ 'ext': 'mp4',
+ 'uploader': 'I Planet Entertainment',
+ 'upload_date': '20111122',
+ 'timestamp': 1321985454,
+ 'description': 'md5:44b1e46619df3a072294645c770cef36',
+ 'title': 'Love In Magic',
+ 'age_limit': 13,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ resp = self._download_json(
+ 'https://www.viki.com/api/videos/' + video_id,
+ video_id, 'Downloading video JSON', headers={
+ 'x-client-user-agent': std_headers['User-Agent'],
+ 'x-viki-app-ver': '3.0.0',
+ })
+ video = resp['video']
+
+ self._check_errors(video)
+
+ title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
+ episode_number = int_or_none(video.get('number'))
+ if not title:
+ title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
+ container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
+ container_title = self.dict_selection(container_titles, 'en')
+ title = '%s - %s' % (container_title, title)
+
+ description = self.dict_selection(video.get('descriptions', {}), 'en')
+
+ like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
+
+ thumbnails = []
+ for thumbnail_id, thumbnail in (video.get('images') or {}).items():
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail.get('url'),
+ })
+
+ subtitles = {}
+ for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
+ subtitles[subtitle_lang] = [{
+ 'ext': subtitles_format,
+ 'url': self._prepare_call(
+ 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
+ } for subtitles_format in ('srt', 'vtt')]
+
+ result = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'uploader': video.get('author'),
+ 'uploader_url': video.get('author_url'),
+ 'like_count': like_count,
+ 'age_limit': parse_age_limit(video.get('rating')),
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'episode_number': episode_number,
+ }
+
+ formats = []
+
+ def add_format(format_id, format_dict, protocol='http'):
+ # rtmps URLs does not seem to work
+ if protocol == 'rtmps':
+ return
+ format_url = format_dict.get('url')
+ if not format_url:
+ return
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
+ stream = qs.get('stream', [None])[0]
+ if stream:
+ format_url = base64.b64decode(stream).decode()
+ if format_id in ('m3u8', 'hls'):
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id='m3u8-%s' % protocol, fatal=False)
+ # Despite CODECS metadata in m3u8 all video-only formats
+ # are actually video+audio
+ for f in m3u8_formats:
+ if '_drm/index_' in f['url']:
+ continue
+ if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
+ f['acodec'] = None
+ formats.append(f)
+ elif format_id in ('mpd', 'dash'):
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, 'mpd-%s' % protocol, fatal=False))
+ elif format_url.startswith('rtmp'):
+ mobj = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
+ format_url)
+ if not mobj:
+ return
+ formats.append({
+ 'format_id': 'rtmp-%s' % format_id,
+ 'ext': 'flv',
+ 'url': mobj.group('url'),
+ 'play_path': mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'page_url': url,
+ })
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (format_id, protocol),
+ 'height': int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)),
+ })
+
+ for format_id, format_dict in (resp.get('streams') or {}).items():
+ add_format(format_id, format_dict)
+ if not formats:
+ streams = self._call_api(
+ 'videos/%s/streams.json' % video_id, video_id,
+ 'Downloading video streams JSON')
+
+ if 'external' in streams:
+ result.update({
+ '_type': 'url_transparent',
+ 'url': streams['external']['url'],
+ })
+ return result
+
+ for format_id, stream_dict in streams.items():
+ for protocol, format_dict in stream_dict.items():
+ add_format(format_id, format_dict, protocol)
+ self._sort_formats(formats)
+
+ result['formats'] = formats
+ return result
+
+
+class VikiChannelIE(VikiBaseIE):
+ IE_NAME = 'viki:channel'
+ _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
+ 'info_dict': {
+ 'id': '50c',
+ 'title': 'Boys Over Flowers',
+ 'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
+ },
+ 'playlist_mincount': 71,
+ }, {
+ 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
+ 'info_dict': {
+ 'id': '1354c',
+ 'title': 'Poor Nastya [COMPLETE]',
+ 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
+ },
+ 'playlist_count': 127,
+ 'skip': 'Page not found',
+ }, {
+ 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/artists/2141c-shinee',
+ 'only_matching': True,
+ }]
+
+ _PER_PAGE = 25
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ channel = self._call_api(
+ 'containers/%s.json' % channel_id, channel_id,
+ 'Downloading channel JSON')
+
+ self._check_errors(channel)
+
+ title = self.dict_selection(channel['titles'], 'en')
+
+ description = self.dict_selection(channel['descriptions'], 'en')
+
+ entries = []
+ for video_type in ('episodes', 'clips', 'movies'):
+ for page_num in itertools.count(1):
+ page = self._call_api(
+ 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+ % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+ 'Downloading %s JSON page #%d' % (video_type, page_num))
+ for video in page['response']:
+ video_id = video['id']
+ entries.append(self.url_result(
+ 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
+ if not page['pagination']['next']:
+ break
+
+ return self.playlist_result(entries, channel_id, title, description)
diff --git a/hypervideo_dl/extractor/vimeo.py b/hypervideo_dl/extractor/vimeo.py
new file mode 100644
index 0000000..6323219
--- /dev/null
+++ b/hypervideo_dl/extractor/vimeo.py
@@ -0,0 +1,1158 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import functools
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_kwargs,
+ compat_HTTPError,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ get_element_by_class,
+ js_to_json,
+ int_or_none,
+ merge_dicts,
+ OnDemandPagedList,
+ parse_filesize,
+ parse_iso8601,
+ sanitized_Request,
+ smuggle_url,
+ std_headers,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ unsmuggle_url,
+ urlencode_postdata,
+ urljoin,
+ unescapeHTML,
+)
+
+
+class VimeoBaseInfoExtractor(InfoExtractor):
+ _NETRC_MACHINE = 'vimeo'
+ _LOGIN_REQUIRED = False
+ _LOGIN_URL = 'https://vimeo.com/log_in'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ if self._LOGIN_REQUIRED:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ return
+ webpage = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ data = {
+ 'action': 'login',
+ 'email': username,
+ 'password': password,
+ 'service': 'vimeo',
+ 'token': token,
+ }
+ self._set_vimeo_cookie('vuid', vuid)
+ try:
+ self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
+ raise ExtractorError(
+ 'Unable to log in: bad username or password',
+ expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _get_video_password(self):
+ password = self._downloader.params.get('videopassword')
+ if password is None:
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option',
+ expected=True)
+ return password
+
+ def _verify_video_password(self, url, video_id, password, token, vuid):
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ self._set_vimeo_cookie('vuid', vuid)
+ return self._download_webpage(
+ url + '/password', video_id, 'Verifying the password',
+ 'Wrong password', data=urlencode_postdata({
+ 'password': password,
+ 'token': token,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ })
+
+ def _extract_xsrft_and_vuid(self, webpage):
+ xsrft = self._search_regex(
+ r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
+ webpage, 'login token', group='xsrft')
+ vuid = self._search_regex(
+ r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
+ webpage, 'vuid', group='vuid')
+ return xsrft, vuid
+
+ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
+ webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
+ if vimeo_config:
+ return self._parse_json(vimeo_config, video_id)
+
+ def _set_vimeo_cookie(self, name, value):
+ self._set_cookie('vimeo.com', name, value)
+
+ def _vimeo_sort_formats(self, formats):
+ # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified. This lead to wrong sorting.
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+
+ def _parse_config(self, config, video_id):
+ video_data = config['video']
+ video_title = video_data['title']
+ live_event = video_data.get('live_event') or {}
+ is_live = live_event.get('status') == 'started'
+ request = config.get('request') or {}
+
+ formats = []
+ config_files = video_data.get('files') or request.get('files') or {}
+ for f in (config_files.get('progressive') or []):
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http-%s' % f.get('quality'),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'fps': int_or_none(f.get('fps')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ })
+
+ # TODO: fix handling of 308 status code returned for live archive manifest requests
+ sep_pattern = r'/sep/video/'
+ for files_type in ('hls', 'dash'):
+ for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
+ manifest_url = cdn_data.get('url')
+ if not manifest_url:
+ continue
+ format_id = '%s-%s' % (files_type, cdn_name)
+ sep_manifest_urls = []
+ if re.search(sep_pattern, manifest_url):
+ for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
+ sep_manifest_urls.append((format_id + suffix, re.sub(
+ sep_pattern, '/%s/' % repl, manifest_url)))
+ else:
+ sep_manifest_urls = [(format_id, manifest_url)]
+ for f_id, m_url in sep_manifest_urls:
+ if files_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ m_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
+ note='Downloading %s m3u8 information' % cdn_name,
+ fatal=False))
+ elif files_type == 'dash':
+ if 'json=1' in m_url:
+ real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
+ if real_m_url:
+ m_url = real_m_url
+ mpd_formats = self._extract_mpd_formats(
+ m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
+ 'Downloading %s MPD information' % cdn_name,
+ fatal=False)
+ formats.extend(mpd_formats)
+
+ live_archive = live_event.get('archive') or {}
+ live_archive_source_url = live_archive.get('source_url')
+ if live_archive_source_url and live_archive.get('status') == 'done':
+ formats.append({
+ 'format_id': 'live-archive-source',
+ 'url': live_archive_source_url,
+ 'preference': 1,
+ })
+
+ for f in formats:
+ if f.get('vcodec') == 'none':
+ f['preference'] = -50
+ elif f.get('acodec') == 'none':
+ f['preference'] = -40
+
+ subtitles = {}
+ for tt in (request.get('text_tracks') or []):
+ subtitles[tt['lang']] = [{
+ 'ext': 'vtt',
+ 'url': urljoin('https://vimeo.com', tt['url']),
+ }]
+
+ thumbnails = []
+ if not is_live:
+ for key, thumb in (video_data.get('thumbs') or {}).items():
+ thumbnails.append({
+ 'id': key,
+ 'width': int_or_none(key),
+ 'url': thumb,
+ })
+ thumbnail = video_data.get('thumbnail')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ owner = video_data.get('owner') or {}
+ video_uploader_url = owner.get('url')
+
+ return {
+ 'id': str_or_none(video_data.get('id')) or video_id,
+ 'title': self._live_title(video_title) if is_live else video_title,
+ 'uploader': owner.get('name'),
+ 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
+ 'uploader_url': video_uploader_url,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(video_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+ def _extract_original_format(self, url, video_id, unlisted_hash=None):
+ query = {'action': 'load_download_config'}
+ if unlisted_hash:
+ query['unlisted_hash'] = unlisted_hash
+ download_data = self._download_json(
+ url, video_id, fatal=False, query=query,
+ headers={'X-Requested-With': 'XMLHttpRequest'})
+ if download_data:
+ source_file = download_data.get('source_file')
+ if isinstance(source_file, dict):
+ download_url = source_file.get('download_url')
+ if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
+ source_name = source_file.get('public_name', 'Original')
+ if self._is_valid_url(download_url, video_id, '%s video' % source_name):
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
+ return {
+ 'url': download_url,
+ 'ext': ext,
+ 'width': int_or_none(source_file.get('width')),
+ 'height': int_or_none(source_file.get('height')),
+ 'filesize': parse_filesize(source_file.get('size')),
+ 'format_id': source_name,
+ 'preference': 1,
+ }
+
+
+class VimeoIE(VimeoBaseInfoExtractor):
+ """Information extractor for vimeo.com."""
+
+ # _VALID_URL matches Vimeo URLs
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ www|
+ player
+ )
+ \.
+ )?
+ vimeo(?:pro)?\.com/
+ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+ (?:.*?/)?
+ (?:
+ (?:
+ play_redirect_hls|
+ moogaloop\.swf)\?clip_id=
+ )?
+ (?:videos?/)?
+ (?P<id>[0-9]+)
+ (?:/(?P<unlisted_hash>[\da-f]{10}))?
+ /?(?:[?&].*)?(?:[#].*)?$
+ '''
+ IE_NAME = 'vimeo'
+ _TESTS = [
+ {
+ 'url': 'http://vimeo.com/56015672#at=0',
+ 'md5': '8879b6cc097e987f02484baf890129e5',
+ 'info_dict': {
+ 'id': '56015672',
+ 'ext': 'mp4',
+ 'title': "hypervideo test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+ 'description': 'md5:2d3305bad981a06ff79f027f19865021',
+ 'timestamp': 1355990239,
+ 'upload_date': '20121220',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
+ 'uploader_id': 'user7108434',
+ 'uploader': 'Filippo Valsorda',
+ 'duration': 10,
+ 'license': 'by-sa',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ },
+ {
+ 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+ 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
+ 'note': 'Vimeo Pro video (#1197)',
+ 'info_dict': {
+ 'id': '68093876',
+ 'ext': 'mp4',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
+ 'uploader_id': 'openstreetmapus',
+ 'uploader': 'OpenStreetMap US',
+ 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+ 'description': 'md5:2c362968038d4499f4d79f88458590c1',
+ 'duration': 1595,
+ 'upload_date': '20130610',
+ 'timestamp': 1370893156,
+ 'license': 'by',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/54469442',
+ 'md5': '619b811a4417aa4abe78dc653becf511',
+ 'note': 'Videos that embed the url in the player page',
+ 'info_dict': {
+ 'id': '54469442',
+ 'ext': 'mp4',
+ 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
+ 'uploader': 'Business of Software',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
+ 'uploader_id': 'businessofsoftware',
+ 'duration': 3610,
+ 'description': None,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://vimeo.com/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'note': 'Video protected with password',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'hypervideo password protected test video',
+ 'timestamp': 1371200155,
+ 'upload_date': '20130614',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'hypervideo',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/channels/keypeele/75629013',
+ 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+ 'info_dict': {
+ 'id': '75629013',
+ 'ext': 'mp4',
+ 'title': 'Key & Peele: Terrorist Interrogation',
+ 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
+ 'uploader_id': 'atencio',
+ 'uploader': 'Peter Atencio',
+ 'channel_id': 'keypeele',
+ 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
+ 'timestamp': 1380339469,
+ 'upload_date': '20130928',
+ 'duration': 187,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://vimeo.com/76979871',
+ 'note': 'Video with subtitles',
+ 'info_dict': {
+ 'id': '76979871',
+ 'ext': 'mp4',
+ 'title': 'The New Vimeo Player (You Know, For Videos)',
+ 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
+ 'timestamp': 1381846109,
+ 'upload_date': '20131015',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
+ 'uploader_id': 'staff',
+ 'uploader': 'Vimeo Staff',
+ 'duration': 62,
+ 'subtitles': {
+ 'de': [{'ext': 'vtt'}],
+ 'en': [{'ext': 'vtt'}],
+ 'es': [{'ext': 'vtt'}],
+ 'fr': [{'ext': 'vtt'}],
+ },
+ }
+ },
+ {
+ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+ 'url': 'https://player.vimeo.com/video/98044508',
+ 'note': 'The js code contains assignments to the same variable as the config',
+ 'info_dict': {
+ 'id': '98044508',
+ 'ext': 'mp4',
+ 'title': 'Pier Solar OUYA Official Trailer',
+ 'uploader': 'Tulio Gonçalves',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
+ 'uploader_id': 'user28849593',
+ },
+ },
+ {
+ # contains original format
+ 'url': 'https://vimeo.com/33951933',
+ 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
+ 'info_dict': {
+ 'id': '33951933',
+ 'ext': 'mp4',
+ 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
+ 'uploader': 'The DMCI',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
+ 'uploader_id': 'dmci',
+ 'timestamp': 1324343742,
+ 'upload_date': '20111220',
+ 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
+ },
+ },
+ {
+ # only available via https://vimeo.com/channels/tributes/6213729 and
+ # not via https://vimeo.com/6213729
+ 'url': 'https://vimeo.com/channels/tributes/6213729',
+ 'info_dict': {
+ 'id': '6213729',
+ 'ext': 'mp4',
+ 'title': 'Vimeo Tribute: The Shining',
+ 'uploader': 'Casey Donahue',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
+ 'uploader_id': 'caseydonahue',
+ 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
+ 'channel_id': 'tributes',
+ 'timestamp': 1250886430,
+ 'upload_date': '20090821',
+ 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ # redirects to ondemand extractor and should be passed through it
+ # for successful extraction
+ 'url': 'https://vimeo.com/73445910',
+ 'info_dict': {
+ 'id': '73445910',
+ 'ext': 'mp4',
+ 'title': 'The Reluctant Revolutionary',
+ 'uploader': '10Ft Films',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+ 'uploader_id': 'tenfootfilms',
+ 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
+ 'upload_date': '20130830',
+ 'timestamp': 1377853339,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ 'skip': 'this page is no longer available.',
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'hypervideo password protected test video',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'hypervideo',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/109815029',
+ 'note': 'Video not completely processed, "failed" seed status',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/album/2632481/video/79010983',
+ 'only_matching': True,
+ },
+ {
+ # source file returns 403: Forbidden
+ 'url': 'https://vimeo.com/7809605',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/160743502/abd0e13fb4',
+ 'only_matching': True,
+ },
+ {
+ # requires passing unlisted_hash(a52724358e) to load_download_config request
+ 'url': 'https://vimeo.com/392479337/a52724358e',
+ 'only_matching': True,
+ }
+ # https://gettingthingsdone.com/workflowmap/
+ # vimeo embed with check-password page protected by Referer header
+ ]
+
+ @staticmethod
+ def _smuggle_referrer(url, referrer_url):
+ return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+
+ @staticmethod
+ def _extract_urls(url, webpage):
+ urls = []
+ # Look for embedded (iframe) Vimeo player
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
+ webpage):
+ urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
+ PLAIN_EMBED_RE = (
+ # Look for embedded (swf embed) Vimeo player
+ r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+ # Look more for non-standard embedded Vimeo player
+ r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+ )
+ for embed_re in PLAIN_EMBED_RE:
+ for mobj in re.finditer(embed_re, webpage):
+ urls.append(mobj.group('url'))
+ return urls
+
+ @staticmethod
+ def _extract_url(url, webpage):
+ urls = VimeoIE._extract_urls(url, webpage)
+ return urls[0] if urls else None
+
+ def _verify_player_video_password(self, url, video_id, headers):
+ password = self._get_video_password()
+ data = urlencode_postdata({
+ 'password': base64.b64encode(password.encode()),
+ })
+ headers = merge_dicts(headers, {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ checked = self._download_json(
+ url + '/check-password', video_id,
+ 'Verifying the password', data=data, headers=headers)
+ if checked is False:
+ raise ExtractorError('Wrong video password', expected=True)
+ return checked
+
+ def _real_initialize(self):
+ self._login()
+
+ def _extract_from_api(self, video_id, unlisted_hash=None):
+ token = self._download_json(
+ 'https://vimeo.com/_rv/jwt', video_id, headers={
+ 'X-Requested-With': 'XMLHttpRequest'
+ })['token']
+ api_url = 'https://api.vimeo.com/videos/' + video_id
+ if unlisted_hash:
+ api_url += ':' + unlisted_hash
+ video = self._download_json(
+ api_url, video_id, headers={
+ 'Authorization': 'jwt ' + token,
+ }, query={
+ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
+ })
+ info = self._parse_config(self._download_json(
+ video['config_url'], video_id), video_id)
+ self._vimeo_sort_formats(info['formats'])
+ get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
+ info.update({
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'release_timestamp': get_timestamp('release'),
+ 'timestamp': get_timestamp('created'),
+ 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
+ })
+ connections = try_get(
+ video, lambda x: x['metadata']['connections'], dict) or {}
+ for k in ('comment', 'like'):
+ info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
+ return info
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+ headers = std_headers.copy()
+ if 'http_headers' in data:
+ headers.update(data['http_headers'])
+ if 'Referer' not in headers:
+ headers['Referer'] = url
+
+ mobj = re.match(self._VALID_URL, url).groupdict()
+ video_id, unlisted_hash = mobj['id'], mobj.get('unlisted_hash')
+ if unlisted_hash:
+ return self._extract_from_api(video_id, unlisted_hash)
+
+ orig_url = url
+ is_pro = 'vimeopro.com/' in url
+ if is_pro:
+ # some videos require portfolio_id to be present in player url
+ # https://github.com/ytdl-org/youtube-dl/issues/20070
+ url = self._extract_url(url, self._download_webpage(url, video_id))
+ if not url:
+ url = 'https://vimeo.com/' + video_id
+ elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
+ url = 'https://vimeo.com/' + video_id
+
+ try:
+ # Retrieve video webpage to extract further information
+ webpage, urlh = self._download_webpage_handle(
+ url, video_id, headers=headers)
+ redirect_url = urlh.geturl()
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+ errmsg = ee.cause.read()
+ if b'Because of its privacy settings, this video cannot be played here' in errmsg:
+ raise ExtractorError(
+ 'Cannot download embed-only video without embedding '
+ 'URL. Please call hypervideo with the URL of the page '
+ 'that embeds this video.',
+ expected=True)
+ raise
+
+ if '//player.vimeo.com/video/' in url:
+ config = self._parse_json(self._search_regex(
+ r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+ if config.get('view') == 4:
+ config = self._verify_player_video_password(
+ redirect_url, video_id, headers)
+ info = self._parse_config(config, video_id)
+ self._vimeo_sort_formats(info['formats'])
+ return info
+
+ if re.search(r'<form[^>]+?id="pw_form"', webpage):
+ video_password = self._get_video_password()
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ webpage = self._verify_video_password(
+ redirect_url, video_id, video_password, token, vuid)
+
+ vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
+ if vimeo_config:
+ seed_status = vimeo_config.get('seed_status') or {}
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
+ cc_license = None
+ timestamp = None
+ video_description = None
+ info_dict = {}
+
+ channel_id = self._search_regex(
+ r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+ if channel_id:
+ config_url = self._html_search_regex(
+ r'\bdata-config-url="([^"]+)"', webpage, 'config URL')
+ video_description = clean_html(get_element_by_class('description', webpage))
+ info_dict.update({
+ 'channel_id': channel_id,
+ 'channel_url': 'https://vimeo.com/channels/' + channel_id,
+ })
+ else:
+ page_config = self._parse_json(self._search_regex(
+ r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+ webpage, 'page config', default='{}'), video_id, fatal=False)
+ if not page_config:
+ return self._extract_from_api(video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ clip = page_config.get('clip') or {}
+ timestamp = clip.get('uploaded_on')
+ video_description = clean_html(
+ clip.get('description') or page_config.get('description_html_escaped'))
+ config = self._download_json(config_url, video_id)
+ video = config.get('video') or {}
+ vod = video.get('vod') or {}
+
+ def is_rented():
+ if '>You rented this title.<' in webpage:
+ return True
+ if try_get(config, lambda x: x['user']['purchased']):
+ return True
+ for purchase_option in (vod.get('purchase_options') or []):
+ if purchase_option.get('purchased'):
+ return True
+ label = purchase_option.get('label_string')
+ if label and (label.startswith('You rented this') or label.endswith(' remaining')):
+ return True
+ return False
+
+ if is_rented() and vod.get('is_trailer'):
+ feature_id = vod.get('feature_id')
+ if feature_id and not data.get('force_feature_id', False):
+ return self.url_result(smuggle_url(
+ 'https://player.vimeo.com/player/%s' % feature_id,
+ {'force_feature_id': True}), 'Vimeo')
+
+ if not video_description:
+ video_description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'],
+ webpage, default=None)
+ if not video_description and is_pro:
+ orig_webpage = self._download_webpage(
+ orig_url, video_id,
+ note='Downloading webpage for description',
+ fatal=False)
+ if orig_webpage:
+ video_description = self._html_search_meta(
+ 'description', orig_webpage, default=None)
+ if not video_description:
+ self._downloader.report_warning('Cannot find video description')
+
+ if not timestamp:
+ timestamp = self._search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage,
+ 'timestamp', default=None)
+
+ formats = []
+
+ source_format = self._extract_original_format(
+ 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
+ if source_format:
+ formats.append(source_format)
+
+ info_dict_config = self._parse_config(config, video_id)
+ formats.extend(info_dict_config['formats'])
+ self._vimeo_sort_formats(formats)
+
+ json_ld = self._search_json_ld(webpage, video_id, default={})
+
+ if not cc_license:
+ cc_license = self._search_regex(
+ r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
+ webpage, 'license', default=None, group='license')
+
+ info_dict.update({
+ 'formats': formats,
+ 'timestamp': unified_timestamp(timestamp),
+ 'description': video_description,
+ 'webpage_url': url,
+ 'license': cc_license,
+ })
+
+ return merge_dicts(info_dict, info_dict_config, json_ld)
+
+
+class VimeoOndemandIE(VimeoIE):
+ IE_NAME = 'vimeo:ondemand'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # ondemand video not available via https://vimeo.com/id
+ 'url': 'https://vimeo.com/ondemand/20704',
+ 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+ 'info_dict': {
+ 'id': '105442900',
+ 'ext': 'mp4',
+ 'title': 'המעבדה - במאי יותם פלדמן',
+ 'uploader': 'גם סרטים',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
+ 'uploader_id': 'gumfilms',
+ 'description': 'md5:4c027c965e439de4baab621e48b60791',
+ 'upload_date': '20140906',
+ 'timestamp': 1410032453,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ # requires Referer to be passed along with og:video:url
+ 'url': 'https://vimeo.com/ondemand/36938/126682985',
+ 'info_dict': {
+ 'id': '126584684',
+ 'ext': 'mp4',
+ 'title': 'Rävlock, rätt läte på rätt plats',
+ 'uploader': 'Lindroth & Norin',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
+ 'uploader_id': 'lindrothnorin',
+ 'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
+ 'upload_date': '20150502',
+ 'timestamp': 1430586422,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'url': 'https://vimeo.com/ondemand/nazmaalik',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/141692381',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+ 'only_matching': True,
+ }]
+
+
+class VimeoChannelIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:channel'
+ _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+ _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
+ _TITLE = None
+ _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/channels/tributes',
+ 'info_dict': {
+ 'id': 'tributes',
+ 'title': 'Vimeo Tributes',
+ },
+ 'playlist_mincount': 25,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/videos/page:%d/' % (base_url, pagenum)
+
+ def _extract_list_title(self, webpage):
+ return self._TITLE or self._html_search_regex(
+ self._TITLE_RE, webpage, 'list title', fatal=False)
+
+ def _title_and_entries(self, list_id, base_url):
+ for pagenum in itertools.count(1):
+ page_url = self._page_url(base_url, pagenum)
+ webpage = self._download_webpage(
+ page_url, list_id,
+ 'Downloading page %s' % pagenum)
+
+ if pagenum == 1:
+ yield self._extract_list_title(webpage)
+
+ # Try extracting href first since not all videos are available via
+ # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+ clips = re.findall(
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
+ if clips:
+ for video_id, video_url, video_title in clips:
+ yield self.url_result(
+ compat_urlparse.urljoin(base_url, video_url),
+ VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
+ # More relaxed fallback
+ else:
+ for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+ yield self.url_result(
+ 'https://vimeo.com/%s' % video_id,
+ VimeoIE.ie_key(), video_id=video_id)
+
+ if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+ break
+
+ def _extract_videos(self, list_id, base_url):
+ title_and_entries = self._title_and_entries(list_id, base_url)
+ list_title = next(title_and_entries)
+ return self.playlist_result(title_and_entries, list_id, list_title)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
+
+
+class VimeoUserIE(VimeoChannelIE):
+ IE_NAME = 'vimeo:user'
+ _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
+ _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/nkistudio/videos',
+ 'info_dict': {
+ 'title': 'Nki',
+ 'id': 'nkistudio',
+ },
+ 'playlist_mincount': 66,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/%s'
+
+
+class VimeoAlbumIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:album'
+ _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
+ _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/album/2632481',
+ 'info_dict': {
+ 'id': '2632481',
+ 'title': 'Staff Favorites: November 2013',
+ },
+ 'playlist_mincount': 13,
+ }, {
+ 'note': 'Password-protected album',
+ 'url': 'https://vimeo.com/album/3253534',
+ 'info_dict': {
+ 'title': 'test',
+ 'id': '3253534',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'videopassword': 'hypervideo',
+ }
+ }]
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, album_id, authorization, hashed_pass, page):
+ api_page = page + 1
+ query = {
+ 'fields': 'link,uri',
+ 'page': api_page,
+ 'per_page': self._PAGE_SIZE,
+ }
+ if hashed_pass:
+ query['_hashed_pass'] = hashed_pass
+ try:
+ videos = self._download_json(
+ 'https://api.vimeo.com/albums/%s/videos' % album_id,
+ album_id, 'Downloading page %d' % api_page, query=query, headers={
+ 'Authorization': 'jwt ' + authorization,
+ })['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ return
+ for video in videos:
+ link = video.get('link')
+ if not link:
+ continue
+ uri = video.get('uri')
+ video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
+ yield self.url_result(link, VimeoIE.ie_key(), video_id)
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
+ if not viewer:
+ webpage = self._download_webpage(url, album_id)
+ viewer = self._parse_json(self._search_regex(
+ r'bootstrap_data\s*=\s*({.+?})</script>',
+ webpage, 'bootstrap data'), album_id)['viewer']
+ jwt = viewer['jwt']
+ album = self._download_json(
+ 'https://api.vimeo.com/albums/' + album_id,
+ album_id, headers={'Authorization': 'jwt ' + jwt},
+ query={'fields': 'description,name,privacy'})
+ hashed_pass = None
+ if try_get(album, lambda x: x['privacy']['view']) == 'password':
+ password = self._downloader.params.get('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This album is protected by a password, use the --video-password option',
+ expected=True)
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ hashed_pass = self._download_json(
+ 'https://vimeo.com/showcase/%s/auth' % album_id,
+ album_id, 'Verifying the password', data=urlencode_postdata({
+ 'password': password,
+ 'token': viewer['xsrft'],
+ }), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['hashed_pass']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError('Wrong password', expected=True)
+ raise
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, album_id, album.get('name'), album.get('description'))
+
+
+class VimeoGroupsIE(VimeoChannelIE):
+ IE_NAME = 'vimeo:group'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/groups/kattykay',
+ 'info_dict': {
+ 'id': 'kattykay',
+ 'title': 'Katty Kay',
+ },
+ 'playlist_mincount': 27,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
+
+
+class VimeoReviewIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:review'
+ IE_DESC = 'Review pages on vimeo'
+ _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
+ 'md5': 'c507a72f780cacc12b2248bb4006d253',
+ 'info_dict': {
+ 'id': '75524534',
+ 'ext': 'mp4',
+ 'title': "DICK HARDWICK 'Comedian'",
+ 'uploader': 'Richard Hardwick',
+ 'uploader_id': 'user21297594',
+ 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'note': 'video player needs Referer',
+ 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
+ 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
+ 'info_dict': {
+ 'id': '91613211',
+ 'ext': 'mp4',
+ 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
+ 'uploader': 'DevWeek Events',
+ 'duration': 2773,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader_id': 'user22258446',
+ },
+ 'skip': 'video gone',
+ }, {
+ 'note': 'Password protected',
+ 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
+ 'info_dict': {
+ 'id': '138823582',
+ 'ext': 'mp4',
+ 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
+ 'uploader': 'TMB',
+ 'uploader_id': 'user37284429',
+ },
+ 'params': {
+ 'videopassword': 'holygrail',
+ },
+ 'skip': 'video gone',
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ page_url, video_id = re.match(self._VALID_URL, url).groups()
+ data = self._download_json(
+ page_url.replace('/review/', '/review/data/'), video_id)
+ if data.get('isLocked') is True:
+ video_password = self._get_video_password()
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', video_id)
+ webpage = self._verify_video_password(
+ 'https://vimeo.com/' + video_id, video_id,
+ video_password, viewer['xsrft'], viewer['vuid'])
+ clip_page_config = self._parse_json(self._search_regex(
+ r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
+ webpage, 'clip page config'), video_id)
+ config_url = clip_page_config['player']['config_url']
+ clip_data = clip_page_config.get('clip') or {}
+ else:
+ clip_data = data['clipData']
+ config_url = clip_data['configUrl']
+ config = self._download_json(config_url, video_id)
+ info_dict = self._parse_config(config, video_id)
+ source_format = self._extract_original_format(
+ page_url + '/action', video_id)
+ if source_format:
+ info_dict['formats'].append(source_format)
+ self._vimeo_sort_formats(info_dict['formats'])
+ info_dict['description'] = clean_html(clip_data.get('description'))
+ return info_dict
+
+
+class VimeoWatchLaterIE(VimeoChannelIE):
+ IE_NAME = 'vimeo:watchlater'
+ IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
+ _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
+ _TITLE = 'Watch Later'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': 'https://vimeo.com/watchlater',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _page_url(self, base_url, pagenum):
+ url = '%s/page:%d/' % (base_url, pagenum)
+ request = sanitized_Request(url)
+ # Set the header to get a partial html page with the ids,
+ # the normal page doesn't contain them.
+ request.add_header('X-Requested-With', 'XMLHttpRequest')
+ return request
+
+ def _real_extract(self, url):
+ return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
+
+
+class VimeoLikesIE(VimeoChannelIE):
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
+ IE_NAME = 'vimeo:likes'
+ IE_DESC = 'Vimeo user likes'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/user755559/likes/',
+ 'playlist_mincount': 293,
+ 'info_dict': {
+ 'id': 'user755559',
+ 'title': 'urza’s Likes',
+ },
+ }, {
+ 'url': 'https://vimeo.com/stormlapse/likes',
+ 'only_matching': True,
+ }]
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/page:%d/' % (base_url, pagenum)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
+
+
+class VHXEmbedIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vhx:embed'
+ _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
+ return unescapeHTML(mobj.group(1)) if mobj else None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ config_url = self._parse_json(self._search_regex(
+ r'window\.OTTData\s*=\s*({.+})', webpage,
+ 'ott data'), video_id, js_to_json)['config_url']
+ config = self._download_json(config_url, video_id)
+ info = self._parse_config(config, video_id)
+ info['id'] = video_id
+ self._vimeo_sort_formats(info['formats'])
+ return info
diff --git a/hypervideo_dl/extractor/vimple.py b/hypervideo_dl/extractor/vimple.py
new file mode 100644
index 0000000..c74b437
--- /dev/null
+++ b/hypervideo_dl/extractor/vimple.py
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class SprutoBaseIE(InfoExtractor):
+ def _extract_spruto(self, spruto, video_id):
+ playlist = spruto['playlist'][0]
+ title = playlist['title']
+ video_id = playlist.get('videoId') or video_id
+ thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+ duration = int_or_none(playlist.get('duration'))
+
+ formats = [{
+ 'url': f['url'],
+ } for f in playlist['video']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class VimpleIE(SprutoBaseIE):
+ IE_DESC = 'Vimple - one-click video hosting'
+ _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P<id>[\da-f-]{32,36})'
+ _TESTS = [{
+ 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
+ 'md5': '2e750a330ed211d3fd41821c6ad9a279',
+ 'info_dict': {
+ 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
+ 'ext': 'mp4',
+ 'title': 'Sunset',
+ 'duration': 20,
+ 'thumbnail': r're:https?://.*?\.jpg',
+ },
+ }, {
+ 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://player.vimple.ru/iframe/%s' % video_id, video_id)
+
+ spruto = self._parse_json(
+ self._search_regex(
+ r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
+ video_id)
+
+ return self._extract_spruto(spruto, video_id)
diff --git a/hypervideo_dl/extractor/vine.py b/hypervideo_dl/extractor/vine.py
new file mode 100644
index 0000000..80b896b
--- /dev/null
+++ b/hypervideo_dl/extractor/vine.py
@@ -0,0 +1,154 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class VineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://vine.co/v/b9KOOWX7HUx',
+ 'md5': '2f36fed6235b16da96ce9b4dc890940d',
+ 'info_dict': {
+ 'id': 'b9KOOWX7HUx',
+ 'ext': 'mp4',
+ 'title': 'Chicken.',
+ 'alt_title': 'Vine by Jack',
+ 'timestamp': 1368997951,
+ 'upload_date': '20130519',
+ 'uploader': 'Jack',
+ 'uploader_id': '76',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ }, {
+ 'url': 'https://vine.co/v/e192BnZnZ9V',
+ 'info_dict': {
+ 'id': 'e192BnZnZ9V',
+ 'ext': 'mp4',
+ 'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
+ 'alt_title': 'Vine by Pimry_zaa',
+ 'timestamp': 1436057405,
+ 'upload_date': '20150705',
+ 'uploader': 'Pimry_zaa',
+ 'uploader_id': '1135760698325307392',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://vine.co/v/MYxVapFvz2z',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vine.co/v/bxVjBbZlPUH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://archive.vine.co/posts/%s.json' % video_id, video_id)
+
+ def video_url(kind):
+ for url_suffix in ('Url', 'URL'):
+ format_url = data.get('video%s%s' % (kind, url_suffix))
+ if format_url:
+ return format_url
+
+ formats = []
+ for quality, format_id in enumerate(('low', '', 'dash')):
+ format_url = video_url(format_id.capitalize())
+ if not format_url:
+ continue
+ # DASH link returns plain mp4
+ if format_id == 'dash' and determine_ext(format_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id or 'standard',
+ 'quality': quality,
+ })
+ self._sort_formats(formats)
+
+ username = data.get('username')
+
+ alt_title = 'Vine by %s' % username if username else None
+
+ return {
+ 'id': video_id,
+ 'title': data.get('description') or alt_title or 'Vine video',
+ 'alt_title': alt_title,
+ 'thumbnail': data.get('thumbnailUrl'),
+ 'timestamp': unified_timestamp(data.get('created')),
+ 'uploader': username,
+ 'uploader_id': data.get('userIdStr'),
+ 'view_count': int_or_none(data.get('loops')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'comment_count': int_or_none(data.get('comments')),
+ 'repost_count': int_or_none(data.get('reposts')),
+ 'formats': formats,
+ }
+
+
+class VineUserIE(InfoExtractor):
+ IE_NAME = 'vine:user'
+ _VALID_URL = r'https?://vine\.co/(?P<u>u/)?(?P<user>[^/]+)'
+ _VINE_BASE_URL = 'https://vine.co/'
+ _TESTS = [{
+ 'url': 'https://vine.co/itsruthb',
+ 'info_dict': {
+ 'id': 'itsruthb',
+ 'title': 'Ruth B',
+ 'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland',
+ },
+ 'playlist_mincount': 611,
+ }, {
+ 'url': 'https://vine.co/u/942914934646415360',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user = mobj.group('user')
+ u = mobj.group('u')
+
+ profile_url = '%sapi/users/profiles/%s%s' % (
+ self._VINE_BASE_URL, 'vanity/' if not u else '', user)
+ profile_data = self._download_json(
+ profile_url, user, note='Downloading user profile data')
+
+ data = profile_data['data']
+ user_id = data.get('userId') or data['userIdStr']
+ profile = self._download_json(
+ 'https://archive.vine.co/profiles/%s.json' % user_id, user_id)
+ entries = [
+ self.url_result(
+ 'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id)
+ for post_id in profile['posts']
+ if post_id and isinstance(post_id, compat_str)]
+ return self.playlist_result(
+ entries, user, profile.get('username'), profile.get('description'))
diff --git a/hypervideo_dl/extractor/viqeo.py b/hypervideo_dl/extractor/viqeo.py
new file mode 100644
index 0000000..be7dfa8
--- /dev/null
+++ b/hypervideo_dl/extractor/viqeo.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ url_or_none,
+)
+
+
+class ViqeoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ viqeo:|
+ https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
+ https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
+ )
+ (?P<id>[\da-f]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
+ 'md5': 'a169dd1a6426b350dca4296226f21e76',
+ 'info_dict': {
+ 'id': 'cde96f09d25f39bee837',
+ 'ext': 'mp4',
+ 'title': 'cde96f09d25f39bee837',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 76,
+ },
+ }, {
+ 'url': 'viqeo:cde96f09d25f39bee837',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
+ video_id)
+
+ formats = []
+ thumbnails = []
+ for media_file in data['mediaFiles']:
+ if not isinstance(media_file, dict):
+ continue
+ media_url = url_or_none(media_file.get('url'))
+ if not media_url or not media_url.startswith(('http', '//')):
+ continue
+ media_type = str_or_none(media_file.get('type'))
+ if not media_type:
+ continue
+ media_kind = media_type.split('/')[0].lower()
+ f = {
+ 'url': media_url,
+ 'width': int_or_none(media_file.get('width')),
+ 'height': int_or_none(media_file.get('height')),
+ }
+ format_id = str_or_none(media_file.get('quality'))
+ if media_kind == 'image':
+ f['id'] = format_id
+ thumbnails.append(f)
+ elif media_kind in ('video', 'audio'):
+ is_audio = media_kind == 'audio'
+ f.update({
+ 'format_id': 'audio' if is_audio else format_id,
+ 'fps': int_or_none(media_file.get('fps')),
+ 'vcodec': 'none' if is_audio else None,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ duration = int_or_none(data.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/viu.py b/hypervideo_dl/extractor/viu.py
new file mode 100644
index 0000000..3bd3752
--- /dev/null
+++ b/hypervideo_dl/extractor/viu.py
@@ -0,0 +1,272 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_kwargs,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class ViuBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ viu_auth_res = self._request_webpage(
+ 'https://www.viu.com/api/apps/v2/authenticate', None,
+ 'Requesting Viu auth', query={
+ 'acct': 'test',
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'iid': 'guest',
+ 'languageid': 'default',
+ 'platform': 'desktop',
+ 'userid': 'guest',
+ 'useridtype': 'guest',
+ 'ver': '1.0'
+ }, headers=self.geo_verification_headers())
+ self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
+
+ def _call_api(self, path, *args, **kwargs):
+ headers = self.geo_verification_headers()
+ headers.update({
+ 'X-VIU-AUTH': self._auth_token
+ })
+ headers.update(kwargs.get('headers', {}))
+ kwargs['headers'] = headers
+ response = self._download_json(
+ 'https://www.viu.com/api/' + path, *args,
+ **compat_kwargs(kwargs))['response']
+ if response.get('status') != 'success':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, response['message']), expected=True)
+ return response
+
+
+class ViuIE(ViuBaseIE):
+ _VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
+ 'info_dict': {
+ 'id': '1116705532',
+ 'ext': 'mp4',
+ 'title': 'Citizen Khan - Ep 1',
+ 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to India',
+ }, {
+ 'url': 'https://www.viu.com/en/media/1130599965',
+ 'info_dict': {
+ 'id': '1130599965',
+ 'ext': 'mp4',
+ 'title': 'Jealousy Incarnate - Episode 1',
+ 'description': 'md5:d3d82375cab969415d2720b6894361e9',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Indonesia',
+ }, {
+ 'url': 'https://india.viu.com/en/media/1126286865',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'clip/load', video_id, 'Downloading video data', query={
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'id': video_id
+ })['item'][0]
+
+ title = video_data['title']
+
+ m3u8_url = None
+ url_path = video_data.get('urlpathd') or video_data.get('urlpath')
+ tdirforwhole = video_data.get('tdirforwhole')
+ # #EXT-X-BYTERANGE is not supported by native hls downloader
+ # and ffmpeg (#10955)
+ # hls_file = video_data.get('hlsfile')
+ hls_file = video_data.get('jwhlsfile')
+ if url_path and tdirforwhole and hls_file:
+ m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
+ else:
+ # m3u8_url = re.sub(
+ # r'(/hlsc_)[a-z]+(\d+\.m3u8)',
+ # r'\1whe\2', video_data['href'])
+ m3u8_url = video_data['href']
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for key, value in video_data.items():
+ mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
+ if not mobj:
+ continue
+ subtitles.setdefault(mobj.group('lang'), []).append({
+ 'url': value,
+ 'ext': mobj.group('ext')
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'series': video_data.get('moviealbumshowname'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episodeno')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViuPlaylistIE(ViuBaseIE):
+ IE_NAME = 'viu:playlist'
+ _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.viu.com/en/listing/playlist-22461380',
+ 'info_dict': {
+ 'id': '22461380',
+ 'title': 'The Good Wife',
+ },
+ 'playlist_count': 16,
+ 'skip': 'Geo-restricted to Indonesia',
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist_data = self._call_api(
+ 'container/load', playlist_id,
+ 'Downloading playlist info', query={
+ 'appid': 'viu_desktop',
+ 'fmt': 'json',
+ 'id': 'playlist-' + playlist_id
+ })['container']
+
+ entries = []
+ for item in playlist_data.get('item', []):
+ item_id = item.get('id')
+ if not item_id:
+ continue
+ item_id = compat_str(item_id)
+ entries.append(self.url_result(
+ 'viu:' + item_id, 'Viu', item_id))
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_data.get('title'))
+
+
+class ViuOTTIE(InfoExtractor):
+ IE_NAME = 'viu:ott'
+ _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
+ 'info_dict': {
+ 'id': '3421',
+ 'ext': 'mp4',
+ 'title': 'A New Beginning',
+ 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Singapore',
+ }, {
+ 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
+ 'info_dict': {
+ 'id': '7123',
+ 'ext': 'mp4',
+ 'title': '這就是我的生活之道',
+ 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ 'skip': 'Geo-restricted to Hong Kong',
+ }]
+
+ _AREA_ID = {
+ 'HK': 1,
+ 'SG': 2,
+ 'TH': 4,
+ 'PH': 5,
+ }
+
+ def _real_extract(self, url):
+ country_code, video_id = re.match(self._VALID_URL, url).groups()
+
+ query = {
+ 'r': 'vod/ajax-detail',
+ 'platform_flag_label': 'web',
+ 'product_id': video_id,
+ }
+
+ area_id = self._AREA_ID.get(country_code.upper())
+ if area_id:
+ query['area_id'] = area_id
+
+ product_data = self._download_json(
+ 'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
+ 'Downloading video info', query=query)['data']
+
+ video_data = product_data.get('current_product')
+ if not video_data:
+ raise ExtractorError('This video is not available in your region.', expected=True)
+
+ stream_data = self._download_json(
+ 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
+ video_id, 'Downloading stream info', query={
+ 'ccs_product_id': video_data['ccs_product_id'],
+ }, headers={
+ 'Referer': url,
+ 'Origin': re.search(r'https?://[^/]+', url).group(0),
+ })['data']['stream']
+
+ stream_sizes = stream_data.get('size', {})
+ formats = []
+ for vid_format, stream_url in stream_data.get('url', {}).items():
+ height = int_or_none(self._search_regex(
+ r's(\d+)p', vid_format, 'height', default=None))
+ formats.append({
+ 'format_id': vid_format,
+ 'url': stream_url,
+ 'height': height,
+ 'ext': 'mp4',
+ 'filesize': int_or_none(stream_sizes.get(vid_format))
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for sub in video_data.get('subtitle', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('name'), []).append({
+ 'url': sub_url,
+ 'ext': 'srt',
+ })
+
+ title = video_data['synopsis'].strip()
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'series': product_data.get('series', {}).get('name'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('number')),
+ 'duration': int_or_none(stream_data.get('duration')),
+ 'thumbnail': video_data.get('cover_image_url'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/vk.py b/hypervideo_dl/extractor/vk.py
new file mode 100644
index 0000000..6b3513e
--- /dev/null
+++ b/hypervideo_dl/extractor/vk.py
@@ -0,0 +1,689 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import collections
+import functools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_class,
+ int_or_none,
+ OnDemandPagedList,
+ orderedSet,
+ str_or_none,
+ str_to_int,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+)
+from .dailymotion import DailymotionIE
+from .odnoklassniki import OdnoklassnikiIE
+from .pladform import PladformIE
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
+
+
+class VKBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'vk'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page, url_handle = self._download_webpage_handle(
+ 'https://vk.com', None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username.encode('cp1251'),
+ 'pass': password.encode('cp1251'),
+ })
+
+ # vk serves two same remixlhk cookies in Set-Cookie header and expects
+ # first one to be actually set
+ self._apply_first_set_cookie_header(url_handle, 'remixlhk')
+
+ login_page = self._download_webpage(
+ 'https://login.vk.com/?act=login', None,
+ note='Logging in',
+ data=urlencode_postdata(login_form))
+
+ if re.search(r'onLoginFailed', login_page):
+ raise ExtractorError(
+ 'Unable to login, incorrect username and/or password', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _download_payload(self, path, video_id, data, fatal=True):
+ data['al'] = 1
+ code, payload = self._download_json(
+ 'https://vk.com/%s.php' % path, video_id,
+ data=urlencode_postdata(data), fatal=fatal,
+ headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
+ if code == '3':
+ self.raise_login_required()
+ elif code == '8':
+ raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
+ return payload
+
+
+class VKIE(VKBaseIE):
+ IE_NAME = 'vk'
+ IE_DESC = 'VK'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ (?:(?:m|new)\.)?vk\.com/video_|
+ (?:www\.)?daxab.com/
+ )
+ ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
+ (?:
+ (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
+ (?:www\.)?daxab.com/embed/
+ )
+ (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
+ )
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+ 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
+ 'info_dict': {
+ 'id': '-77521_162222515',
+ 'ext': 'mp4',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'uploader_id': '-77521',
+ 'duration': 195,
+ 'timestamp': 1329049880,
+ 'upload_date': '20120212',
+ },
+ },
+ {
+ 'url': 'http://vk.com/video205387401_165548505',
+ 'info_dict': {
+ 'id': '205387401_165548505',
+ 'ext': 'mp4',
+ 'title': 'No name',
+ 'uploader': 'Tom Cruise',
+ 'uploader_id': '205387401',
+ 'duration': 9,
+ 'timestamp': 1374364108,
+ 'upload_date': '20130720',
+ }
+ },
+ {
+ 'note': 'Embedded video',
+ 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
+ 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
+ 'info_dict': {
+ 'id': '-77521_162222515',
+ 'ext': 'mp4',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'duration': 195,
+ 'upload_date': '20120212',
+ 'timestamp': 1329049880,
+ 'uploader_id': '-77521',
+ },
+ },
+ {
+ # VIDEO NOW REMOVED
+ # please update if you find a video whose URL follows the same pattern
+ 'url': 'http://vk.com/video-8871596_164049491',
+ 'md5': 'a590bcaf3d543576c9bd162812387666',
+ 'note': 'Only available for registered users',
+ 'info_dict': {
+ 'id': '-8871596_164049491',
+ 'ext': 'mp4',
+ 'uploader': 'Триллеры',
+ 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
+ 'duration': 8352,
+ 'upload_date': '20121218',
+ 'view_count': int,
+ },
+ 'skip': 'Removed',
+ },
+ {
+ 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
+ 'info_dict': {
+ 'id': '-43215063_168067957',
+ 'ext': 'mp4',
+ 'uploader': 'Bro Mazter',
+ 'title': ' ',
+ 'duration': 7291,
+ 'upload_date': '20140328',
+ 'uploader_id': '223413403',
+ 'timestamp': 1396018030,
+ },
+ 'skip': 'Requires vk account credentials',
+ },
+ {
+ 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
+ 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
+ 'note': 'ivi.ru embed',
+ 'info_dict': {
+ 'id': '-43215063_169084319',
+ 'ext': 'mp4',
+ 'title': 'Книга Илая',
+ 'duration': 6771,
+ 'upload_date': '20140626',
+ 'view_count': int,
+ },
+ 'skip': 'Removed',
+ },
+ {
+ # video (removed?) only available with list id
+ 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
+ 'md5': '091287af5402239a1051c37ec7b92913',
+ 'info_dict': {
+ 'id': '30481095_171201961',
+ 'ext': 'mp4',
+ 'title': 'ТюменцевВВ_09.07.2015',
+ 'uploader': 'Anton Ivanov',
+ 'duration': 109,
+ 'upload_date': '20150709',
+ 'view_count': int,
+ },
+ 'skip': 'Removed',
+ },
+ {
+ # youtube embed
+ 'url': 'https://vk.com/video276849682_170681728',
+ 'info_dict': {
+ 'id': 'V3K4mi0SYkc',
+ 'ext': 'mp4',
+ 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+ 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+ 'duration': 178,
+ 'upload_date': '20130116',
+ 'uploader': "Children's Joy Foundation Inc.",
+ 'uploader_id': 'thecjf',
+ 'view_count': int,
+ },
+ },
+ {
+ # dailymotion embed
+ 'url': 'https://vk.com/video-37468416_456239855',
+ 'info_dict': {
+ 'id': 'k3lz2cmXyRuJQSjGHUv',
+ 'ext': 'mp4',
+ 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
+ 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
+ 'uploader': 'AniLibria.Tv',
+ 'upload_date': '20160914',
+ 'uploader_id': 'x1p5vl5',
+ 'timestamp': 1473877246,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # video key is extra_data not url\d+
+ 'url': 'http://vk.com/video-110305615_171782105',
+ 'md5': 'e13fcda136f99764872e739d13fac1d1',
+ 'info_dict': {
+ 'id': '-110305615_171782105',
+ 'ext': 'mp4',
+ 'title': 'S-Dance, репетиции к The way show',
+ 'uploader': 'THE WAY SHOW | 17 апреля',
+ 'uploader_id': '-110305615',
+ 'timestamp': 1454859345,
+ 'upload_date': '20160207',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # finished live stream, postlive_mp4
+ 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+ 'info_dict': {
+ 'id': '-387766_456242764',
+ 'ext': 'mp4',
+ 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
+ 'uploader': 'Игромания',
+ 'duration': 5239,
+ # TODO: use act=show to extract view_count
+ # 'view_count': int,
+ 'upload_date': '20160929',
+ 'uploader_id': '-387766',
+ 'timestamp': 1475137527,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # live stream, hls and rtmp links, most likely already finished live
+ # stream by the time you are reading this comment
+ 'url': 'https://vk.com/video-140332_456239111',
+ 'only_matching': True,
+ },
+ {
+ # removed video, just testing that we match the pattern
+ 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+ 'only_matching': True,
+ },
+ {
+ # age restricted video, requires vk account credentials
+ 'url': 'https://vk.com/video205387401_164765225',
+ 'only_matching': True,
+ },
+ {
+ # pladform embed
+ 'url': 'https://vk.com/video-76116461_171554880',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://new.vk.com/video205387401_165548505',
+ 'only_matching': True,
+ },
+ {
+ # This video is no longer available, because its author has been blocked.
+ 'url': 'https://vk.com/video-10639516_456240611',
+ 'only_matching': True,
+ },
+ {
+ # The video is not available in your region.
+ 'url': 'https://vk.com/video-51812607_171445436',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_sibnet_urls(webpage):
+ # https://help.sibnet.ru/?sibnet_video_embed
+ return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('videoid')
+
+ mv_data = {}
+ if video_id:
+ data = {
+ 'act': 'show_inline',
+ 'video': video_id,
+ }
+ # Some videos (removed?) can only be downloaded with list id specified
+ list_id = mobj.group('list_id')
+ if list_id:
+ data['list'] = list_id
+
+ payload = self._download_payload('al_video', video_id, data)
+ info_page = payload[1]
+ opts = payload[-1]
+ mv_data = opts.get('mvData') or {}
+ player = opts.get('player') or {}
+ else:
+ video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
+
+ info_page = self._download_webpage(
+ 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
+
+ error_message = self._html_search_regex(
+ [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+ r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
+ info_page, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+
+ if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
+ raise ExtractorError(
+ 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
+ expected=True)
+
+ ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
+
+ ERRORS = {
+ r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
+ ERROR_COPYRIGHT,
+
+ r'>The video .*? was removed from public access by request of the copyright holder.<':
+ ERROR_COPYRIGHT,
+
+ r'<!>Please log in or <':
+ 'Video %s is only available for registered users, '
+ 'use --username and --password options to provide account credentials.',
+
+ r'<!>Unknown error':
+ 'Video %s does not exist.',
+
+ r'<!>Видео временно недоступно':
+ 'Video %s is temporarily unavailable.',
+
+ r'<!>Access denied':
+ 'Access denied to video %s.',
+
+ r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because its author has been blocked.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because it has been deleted.':
+ 'Video %s is no longer available, because it has been deleted.',
+
+ r'<!>The video .+? is not available in your region.':
+ 'Video %s is not available in your region.',
+ }
+
+ for error_re, error_msg in ERRORS.items():
+ if re.search(error_re, info_page):
+ raise ExtractorError(error_msg % video_id, expected=True)
+
+ player = self._parse_json(self._search_regex(
+ r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
+ info_page, 'player params'), video_id)
+
+ youtube_url = YoutubeIE._extract_url(info_page)
+ if youtube_url:
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
+
+ vimeo_url = VimeoIE._extract_url(url, info_page)
+ if vimeo_url is not None:
+ return self.url_result(vimeo_url, VimeoIE.ie_key())
+
+ pladform_url = PladformIE._extract_url(info_page)
+ if pladform_url:
+ return self.url_result(pladform_url, PladformIE.ie_key())
+
+ m_rutube = re.search(
+ r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
+ if m_rutube is not None:
+ rutube_url = self._proto_relative_url(
+ m_rutube.group(1).replace('\\', ''))
+ return self.url_result(rutube_url)
+
+ dailymotion_urls = DailymotionIE._extract_urls(info_page)
+ if dailymotion_urls:
+ return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
+
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
+
+ sibnet_urls = self._extract_sibnet_urls(info_page)
+ if sibnet_urls:
+ return self.url_result(sibnet_urls[0])
+
+ m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
+ if m_opts:
+ m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
+ if m_opts_url:
+ opts_url = m_opts_url.group(1)
+ if opts_url.startswith('//'):
+ opts_url = 'http:' + opts_url
+ return self.url_result(opts_url)
+
+ data = player['params'][0]
+ title = unescapeHTML(data['md_title'])
+
+ # 2 = live
+ # 3 = post live (finished live)
+ is_live = data.get('live') == 2
+ if is_live:
+ title = self._live_title(title)
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+ 'upload date', default=None)) or int_or_none(data.get('date'))
+
+ view_count = str_to_int(self._search_regex(
+ r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+ info_page, 'view count', default=None))
+
+ formats = []
+ for format_id, format_url in data.items():
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
+ continue
+ if (format_id.startswith(('url', 'cache'))
+ or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
+ height = int_or_none(self._search_regex(
+ r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': height,
+ })
+ elif format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False, live=is_live))
+ elif format_id == 'rtmp':
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': 'flv',
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': data.get('jpg'),
+ 'uploader': data.get('md_author'),
+ 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
+ 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'like_count': int_or_none(mv_data.get('likes')),
+ 'comment_count': int_or_none(mv_data.get('commcount')),
+ 'is_live': is_live,
+ }
+
+
+class VKUserVideosIE(VKBaseIE):
+ IE_NAME = 'vk:uservideos'
+ IE_DESC = "VK - User's Videos"
+ _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
+ _TEMPLATE_URL = 'https://vk.com/videos'
+ _TESTS = [{
+ 'url': 'https://vk.com/videos-767561',
+ 'info_dict': {
+ 'id': '-767561_all',
+ },
+ 'playlist_mincount': 1150,
+ }, {
+ 'url': 'https://vk.com/videos-767561?section=uploaded',
+ 'info_dict': {
+ 'id': '-767561_uploaded',
+ },
+ 'playlist_mincount': 425,
+ }, {
+ 'url': 'http://vk.com/videos205387401',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vk.com/videos-77521',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vk.com/videos-97664626?section=all',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.vk.com/videos205387401',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://new.vk.com/videos205387401',
+ 'only_matching': True,
+ }]
+ _PAGE_SIZE = 1000
+ _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
+
+ def _fetch_page(self, page_id, section, page):
+ l = self._download_payload('al_video', page_id, {
+ 'act': 'load_videos_silent',
+ 'offset': page * self._PAGE_SIZE,
+ 'oid': page_id,
+ 'section': section,
+ })[0][section]['list']
+
+ for video in l:
+ v = self._VIDEO._make(video[:2])
+ video_id = '%d_%d' % (v.owner_id, v.id)
+ yield self.url_result(
+ 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
+
+ def _real_extract(self, url):
+ page_id, section = re.match(self._VALID_URL, url).groups()
+ if not section:
+ section = 'all'
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, page_id, section),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(entries, '%s_%s' % (page_id, section))
+
+
+class VKWallPostIE(VKBaseIE):
+ IE_NAME = 'vk:wallpost'
+ _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
+ _TESTS = [{
+ # public page URL, audio playlist
+ 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
+ 'info_dict': {
+ 'id': '-23538238_35',
+ 'title': 'Black Shadow - Wall post -23538238_35',
+ 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
+ },
+ 'playlist': [{
+ 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
+ 'info_dict': {
+ 'id': '135220665_111806521',
+ 'ext': 'mp4',
+ 'title': 'Black Shadow - Слепое Верование',
+ 'duration': 370,
+ 'uploader': 'Black Shadow',
+ 'artist': 'Black Shadow',
+ 'track': 'Слепое Верование',
+ },
+ }, {
+ 'md5': '4cc7e804579122b17ea95af7834c9233',
+ 'info_dict': {
+ 'id': '135220665_111802303',
+ 'ext': 'mp4',
+ 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
+ 'duration': 423,
+ 'uploader': 'Black Shadow',
+ 'artist': 'Black Shadow',
+ 'track': 'Война - Негасимое Бездны Пламя!',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ 'usenetrc': True,
+ },
+ 'skip': 'Requires vk account credentials',
+ }, {
+ # single YouTube embed, no leading -
+ 'url': 'https://vk.com/wall85155021_6319',
+ 'info_dict': {
+ 'id': '85155021_6319',
+ 'title': 'Сергей Горбунов - Wall post 85155021_6319',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'usenetrc': True,
+ },
+ 'skip': 'Requires vk account credentials',
+ }, {
+ # wall page URL
+ 'url': 'https://vk.com/wall-23538238_35',
+ 'only_matching': True,
+ }, {
+ # mobile wall page URL
+ 'url': 'https://m.vk.com/wall-23538238_35',
+ 'only_matching': True,
+ }]
+ _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
+ _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
+
+ def _decode(self, enc):
+ dec = ''
+ e = n = 0
+ for c in enc:
+ r = self._BASE64_CHARS.index(c)
+ cond = n % 4
+ e = 64 * e + r if cond else r
+ n += 1
+ if cond:
+ dec += chr(255 & e >> (-2 * n & 6))
+ return dec
+
+ def _unmask_url(self, mask_url, vk_id):
+ if 'audio_api_unavailable' in mask_url:
+ extra = mask_url.split('?extra=')[1].split('#')
+ func, base = self._decode(extra[1]).split(chr(11))
+ mask_url = list(self._decode(extra[0]))
+ url_len = len(mask_url)
+ indexes = [None] * url_len
+ index = int(base) ^ vk_id
+ for n in range(url_len - 1, -1, -1):
+ index = (url_len * (n + 1) ^ index + n) % url_len
+ indexes[n] = index
+ for n in range(1, url_len):
+ c = mask_url[n]
+ index = indexes[url_len - 1 - n]
+ mask_url[n] = mask_url[index]
+ mask_url[index] = c
+ mask_url = ''.join(mask_url)
+ return mask_url
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+
+ webpage = self._download_payload('wkview', post_id, {
+ 'act': 'show',
+ 'w': 'wall' + post_id,
+ })[1]
+
+ description = clean_html(get_element_by_class('wall_post_text', webpage))
+ uploader = clean_html(get_element_by_class('author', webpage))
+
+ entries = []
+
+ for audio in re.findall(r'data-audio="([^"]+)', webpage):
+ audio = self._parse_json(unescapeHTML(audio), post_id)
+ a = self._AUDIO._make(audio[:16])
+ if not a.url:
+ continue
+ title = unescapeHTML(a.title)
+ performer = unescapeHTML(a.performer)
+ entries.append({
+ 'id': '%s_%s' % (a.owner_id, a.id),
+ 'url': self._unmask_url(a.url, a.ads['vk_id']),
+ 'title': '%s - %s' % (performer, title) if performer else title,
+ 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
+ 'duration': int_or_none(a.duration),
+ 'uploader': uploader,
+ 'artist': performer,
+ 'track': title,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ })
+
+ for video in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
+ entries.append(self.url_result(
+ compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
+
+ title = 'Wall post %s' % post_id
+
+ return self.playlist_result(
+ orderedSet(entries), post_id,
+ '%s - %s' % (uploader, title) if uploader else title,
+ description)
diff --git a/hypervideo_dl/extractor/vlive.py b/hypervideo_dl/extractor/vlive.py
new file mode 100644
index 0000000..42da34d
--- /dev/null
+++ b/hypervideo_dl/extractor/vlive.py
@@ -0,0 +1,328 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+
+from .naver import NaverBaseIE
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class VLiveBaseIE(NaverBaseIE):
+ _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+
+
+class VLiveIE(VLiveBaseIE):
+ IE_NAME = 'vlive'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
+ _NETRC_MACHINE = 'vlive'
+ _TESTS = [{
+ 'url': 'http://www.vlive.tv/video/1326',
+ 'md5': 'cc7314812855ce56de70a06a27314983',
+ 'info_dict': {
+ 'id': '1326',
+ 'ext': 'mp4',
+ 'title': "Girl's Day's Broadcast",
+ 'creator': "Girl's Day",
+ 'view_count': int,
+ 'uploader_id': 'muploader_a',
+ },
+ }, {
+ 'url': 'http://www.vlive.tv/video/16937',
+ 'info_dict': {
+ 'id': '16937',
+ 'ext': 'mp4',
+ 'title': '첸백시 걍방',
+ 'creator': 'EXO',
+ 'view_count': int,
+ 'subtitles': 'mincount:12',
+ 'uploader_id': 'muploader_j',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.vlive.tv/video/129100',
+ 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
+ 'info_dict': {
+ 'id': '129100',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
+ 'creator': 'BTS+',
+ 'view_count': int,
+ 'subtitles': 'mincount:10',
+ },
+ 'skip': 'This video is only available for CH+ subscribers',
+ }, {
+ 'url': 'https://www.vlive.tv/embed/1326',
+ 'only_matching': True,
+ }, {
+ # works only with gcc=KR
+ 'url': 'https://www.vlive.tv/video/225019',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ email, password = self._get_login_info()
+ if None in (email, password):
+ return
+
+ def is_logged_in():
+ login_info = self._download_json(
+ 'https://www.vlive.tv/auth/loginInfo', None,
+ note='Downloading login info',
+ headers={'Referer': 'https://www.vlive.tv/home'})
+ return try_get(
+ login_info, lambda x: x['message']['login'], bool) or False
+
+ LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+ self._request_webpage(
+ LOGIN_URL, None, note='Downloading login cookies')
+
+ self._download_webpage(
+ LOGIN_URL, None, note='Logging in',
+ data=urlencode_postdata({'email': email, 'pwd': password}),
+ headers={
+ 'Referer': LOGIN_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ if not is_logged_in():
+ raise ExtractorError('Unable to log in', expected=True)
+
+ def _call_api(self, path_template, video_id, fields=None):
+ query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
+ if fields:
+ query['fields'] = fields
+ try:
+ return self._download_json(
+ 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+ 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
+ headers={'Referer': 'https://www.vlive.tv/'}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
+ raise
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ post = self._call_api(
+ 'post/v1.0/officialVideoPost-%s', video_id,
+ 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
+
+ video = post['officialVideo']
+
+ def get_common_fields():
+ channel = post.get('channel') or {}
+ return {
+ 'title': video.get('title'),
+ 'creator': post.get('author', {}).get('nickname'),
+ 'channel': channel.get('channelName'),
+ 'channel_id': channel.get('channelCode'),
+ 'duration': int_or_none(video.get('playTime')),
+ 'view_count': int_or_none(video.get('playCount')),
+ 'like_count': int_or_none(video.get('likeCount')),
+ 'comment_count': int_or_none(video.get('commentCount')),
+ }
+
+ video_type = video.get('type')
+ if video_type == 'VOD':
+ inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
+ vod_id = video['vodId']
+ return merge_dicts(
+ get_common_fields(),
+ self._extract_video_info(video_id, vod_id, inkey))
+ elif video_type == 'LIVE':
+ status = video.get('status')
+ if status == 'ON_AIR':
+ stream_url = self._call_api(
+ 'old/v3/live/%s/playInfo',
+ video_id)['result']['adaptiveStreamUrl']
+ formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
+ self._sort_formats(formats)
+ info = get_common_fields()
+ info.update({
+ 'title': self._live_title(video['title']),
+ 'id': video_id,
+ 'formats': formats,
+ 'is_live': True,
+ })
+ return info
+ elif status == 'ENDED':
+ raise ExtractorError(
+ 'Uploading for replay. Please wait...', expected=True)
+ elif status == 'RESERVED':
+ raise ExtractorError('Coming soon!', expected=True)
+ elif video.get('exposeStatus') == 'CANCEL':
+ raise ExtractorError(
+ 'We are sorry, but the live broadcast has been canceled.',
+ expected=True)
+ else:
+ raise ExtractorError('Unknown status ' + status)
+
+
+class VLivePostIE(VLiveIE):
+ IE_NAME = 'vlive:post'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
+ _TESTS = [{
+ # uploadType = SOS
+ 'url': 'https://www.vlive.tv/post/1-20088044',
+ 'info_dict': {
+ 'id': '1-20088044',
+ 'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
+ 'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
+ },
+ 'playlist_count': 3,
+ }, {
+ # uploadType = V
+ 'url': 'https://www.vlive.tv/post/1-20087926',
+ 'info_dict': {
+ 'id': '1-20087926',
+ 'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
+ },
+ 'playlist_count': 1,
+ }]
+ _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
+ _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
+ _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+
+ post = self._call_api(
+ 'post/v1.0/post-%s', post_id,
+ 'attachments{video},officialVideo{videoSeq},plainBody,title')
+
+ video_seq = str_or_none(try_get(
+ post, lambda x: x['officialVideo']['videoSeq']))
+ if video_seq:
+ return self.url_result(
+ 'http://www.vlive.tv/video/' + video_seq,
+ VLiveIE.ie_key(), video_seq)
+
+ title = post['title']
+ entries = []
+ for idx, video in enumerate(post['attachments']['video'].values()):
+ video_id = video.get('videoId')
+ if not video_id:
+ continue
+ upload_type = video.get('uploadType')
+ upload_info = video.get('uploadInfo') or {}
+ entry = None
+ if upload_type == 'SOS':
+ download = self._call_api(
+ self._SOS_TMPL, video_id)['videoUrl']['download']
+ formats = []
+ for f_id, f_url in download.items():
+ formats.append({
+ 'format_id': f_id,
+ 'url': f_url,
+ 'height': int_or_none(f_id[:-1]),
+ })
+ self._sort_formats(formats)
+ entry = {
+ 'formats': formats,
+ 'id': video_id,
+ 'thumbnail': upload_info.get('imageUrl'),
+ }
+ elif upload_type == 'V':
+ vod_id = upload_info.get('videoId')
+ if not vod_id:
+ continue
+ inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+ entry = self._extract_video_info(video_id, vod_id, inkey)
+ if entry:
+ entry['title'] = '%s_part%s' % (title, idx)
+ entries.append(entry)
+ return self.playlist_result(
+ entries, post_id, title, strip_or_none(post.get('plainBody')))
+
+
+class VLiveChannelIE(VLiveBaseIE):
+ IE_NAME = 'vlive:channel'
+ _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
+ _TESTS = [{
+ 'url': 'http://channels.vlive.tv/FCD4B',
+ 'info_dict': {
+ 'id': 'FCD4B',
+ 'title': 'MAMAMOO',
+ },
+ 'playlist_mincount': 110
+ }, {
+ 'url': 'https://www.vlive.tv/channel/FCD4B',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, path, channel_key_suffix, channel_value, note, query):
+ q = {
+ 'app_id': self._APP_ID,
+ 'channel' + channel_key_suffix: channel_value,
+ }
+ q.update(query)
+ return self._download_json(
+ 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
+ channel_value, note='Downloading ' + note, query=q)['result']
+
+ def _real_extract(self, url):
+ channel_code = self._match_id(url)
+
+ channel_seq = self._call_api(
+ 'decodeChannelCode', 'Code', channel_code,
+ 'decode channel code', {})['channelSeq']
+
+ channel_name = None
+ entries = []
+
+ for page_num in itertools.count(1):
+ video_list = self._call_api(
+ 'getChannelVideoList', 'Seq', channel_seq,
+ 'channel list page #%d' % page_num, {
+ # Large values of maxNumOfRows (~300 or above) may cause
+ # empty responses (see [1]), e.g. this happens for [2] that
+ # has more than 300 videos.
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
+ # 2. http://channels.vlive.tv/EDBF.
+ 'maxNumOfRows': 100,
+ 'pageNo': page_num
+ }
+ )
+
+ if not channel_name:
+ channel_name = try_get(
+ video_list,
+ lambda x: x['channelInfo']['channelName'],
+ compat_str)
+
+ videos = try_get(
+ video_list, lambda x: x['videoList'], list)
+ if not videos:
+ break
+
+ for video in videos:
+ video_id = video.get('videoSeq')
+ if not video_id:
+ continue
+ video_id = compat_str(video_id)
+ entries.append(
+ self.url_result(
+ 'http://www.vlive.tv/video/%s' % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id))
+
+ return self.playlist_result(
+ entries, channel_code, channel_name)
diff --git a/hypervideo_dl/extractor/vodlocker.py b/hypervideo_dl/extractor/vodlocker.py
new file mode 100644
index 0000000..02c9617
--- /dev/null
+++ b/hypervideo_dl/extractor/vodlocker.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ NO_DEFAULT,
+ sanitized_Request,
+ urlencode_postdata,
+)
+
+
+class VodlockerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
+
+ _TESTS = [{
+ 'url': 'http://vodlocker.com/e8wvyzz4sl42',
+ 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
+ 'info_dict': {
+ 'id': 'e8wvyzz4sl42',
+ 'ext': 'mp4',
+ 'title': 'Germany vs Brazil',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(p in webpage for p in (
+ '>THIS FILE WAS DELETED<',
+ '>File Not Found<',
+ 'The file you were looking for could not be found, sorry for any inconvenience.<',
+ '>The file was removed')):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = self._hidden_inputs(webpage)
+
+ if fields['op'] == 'download1':
+ self._sleep(3, video_id) # they do detect when requests happen too fast!
+ post = urlencode_postdata(fields)
+ req = sanitized_Request(url, post)
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ webpage = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ def extract_file_url(html, default=NO_DEFAULT):
+ return self._search_regex(
+ r'file:\s*"(http[^\"]+)",', html, 'file url', default=default)
+
+ video_url = extract_file_url(webpage, default=None)
+
+ if not video_url:
+ embed_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1',
+ webpage, 'embed url', group='url')
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
+ video_url = extract_file_url(embed_webpage)
+ thumbnail_webpage = embed_webpage
+ else:
+ thumbnail_webpage = webpage
+
+ title = self._search_regex(
+ r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
+ thumbnail = self._search_regex(
+ r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False)
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': video_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/vodpl.py b/hypervideo_dl/extractor/vodpl.py
new file mode 100644
index 0000000..9e91970
--- /dev/null
+++ b/hypervideo_dl/extractor/vodpl.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .onet import OnetBaseIE
+
+
+class VODPlIE(OnetBaseIE):
+ _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
+ 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
+ 'info_dict': {
+ 'id': '3ep3jns',
+ 'ext': 'mp4',
+ 'title': 'Chłopaki nie płaczą',
+ 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
+ 'timestamp': 1463415154,
+ 'duration': 5765,
+ 'upload_date': '20160516',
+ },
+ }, {
+ 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
+ info_dict['id'] = video_id
+ return info_dict
diff --git a/hypervideo_dl/extractor/vodplatform.py b/hypervideo_dl/extractor/vodplatform.py
new file mode 100644
index 0000000..74d2257
--- /dev/null
+++ b/hypervideo_dl/extractor/vodplatform.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class VODPlatformIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
+ 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'md5': '1db2b7249ce383d6be96499006e951fc',
+ 'info_dict': {
+ 'id': 'RufMcytHDolTH1MuKHY9Fw',
+ 'ext': 'mp4',
+ 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
+ }
+ }, {
+ 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = unescapeHTML(self._og_search_title(webpage))
+ hidden_inputs = self._hidden_inputs(webpage)
+
+ formats = self._extract_wowza_formats(
+ hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/voicerepublic.py b/hypervideo_dl/extractor/voicerepublic.py
new file mode 100644
index 0000000..a52e40a
--- /dev/null
+++ b/hypervideo_dl/extractor/voicerepublic.py
@@ -0,0 +1,62 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ urljoin,
+)
+
+
+class VoiceRepublicIE(InfoExtractor):
+ _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
+ _TESTS = [{
+ 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
+ 'md5': 'b9174d651323f17783000876347116e3',
+ 'info_dict': {
+ 'id': '2296',
+ 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
+ 'ext': 'm4a',
+ 'title': 'Watching the Watchers: Building a Sousveillance State',
+ 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
+ 'duration': 1556,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ if '>Queued for processing, please stand by...<' in webpage:
+ raise ExtractorError(
+ 'Audio is still queued for processing', expected=True)
+
+ talk = self._parse_json(self._search_regex(
+ r'initialSnapshot\s*=\s*({.+?});',
+ webpage, 'talk'), display_id)['talk']
+ title = talk['title']
+ formats = [{
+ 'url': urljoin(url, talk_url),
+ 'format_id': format_id,
+ 'ext': determine_ext(talk_url) or format_id,
+ 'vcodec': 'none',
+ } for format_id, talk_url in talk['media_links'].items()]
+ self._sort_formats(formats)
+
+ return {
+ 'id': compat_str(talk.get('id') or display_id),
+ 'display_id': display_id,
+ 'title': title,
+ 'description': talk.get('teaser'),
+ 'thumbnail': talk.get('image_url'),
+ 'duration': int_or_none(talk.get('archived_duration')),
+ 'view_count': int_or_none(talk.get('play_count')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/voot.py b/hypervideo_dl/extractor/voot.py
new file mode 100644
index 0000000..751b21e
--- /dev/null
+++ b/hypervideo_dl/extractor/voot.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class VootIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)'
+ _GEO_COUNTRIES = ['IN']
+ _TESTS = [{
+ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
+ 'info_dict': {
+ 'id': '0_8ledb18o',
+ 'ext': 'mp4',
+ 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
+ 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
+ 'timestamp': 1472162937,
+ 'upload_date': '20160825',
+ 'duration': 1146,
+ 'series': 'Ishq Ka Rang Safed',
+ 'season_number': 1,
+ 'episode': 'Is this the end of Kamini?',
+ 'episode_number': 340,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.voot.com/movies/pandavas-5/424627',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ media_info = self._download_json(
+ 'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
+ query={
+ 'platform': 'Web',
+ 'pId': 2,
+ 'mediaId': video_id,
+ })
+
+ status_code = try_get(media_info, lambda x: x['status']['code'], int)
+ if status_code != 0:
+ raise ExtractorError(media_info['status']['message'], expected=True)
+
+ media = media_info['assets']
+
+ entry_id = media['EntryId']
+ title = media['MediaName']
+ formats = self._extract_m3u8_formats(
+ 'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
+ video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ description, series, season_number, episode, episode_number = [None] * 5
+
+ for meta in try_get(media, lambda x: x['Metas'], list) or []:
+ key, value = meta.get('Key'), meta.get('Value')
+ if not key or not value:
+ continue
+ if key == 'ContentSynopsis':
+ description = value
+ elif key == 'RefSeriesTitle':
+ series = value
+ elif key == 'RefSeriesSeason':
+ season_number = int_or_none(value)
+ elif key == 'EpisodeMainTitle':
+ episode = value
+ elif key == 'EpisodeNo':
+ episode_number = int_or_none(value)
+
+ return {
+ 'extractor_key': 'Kaltura',
+ 'id': entry_id,
+ 'title': title,
+ 'description': description,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'timestamp': unified_timestamp(media.get('CreationDate')),
+ 'duration': int_or_none(media.get('Duration')),
+ 'view_count': int_or_none(media.get('ViewCounter')),
+ 'like_count': int_or_none(media.get('like_counter')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/voxmedia.py b/hypervideo_dl/extractor/voxmedia.py
new file mode 100644
index 0000000..6612081
--- /dev/null
+++ b/hypervideo_dl/extractor/voxmedia.py
@@ -0,0 +1,225 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .once import OnceIE
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class VoxMediaVolumeIE(OnceIE):
+ _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ setup = self._parse_json(self._search_regex(
+ r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
+ player_setup = setup.get('player_setup') or setup
+ video_data = player_setup.get('video') or {}
+ formatted_metadata = video_data.get('formatted_metadata') or {}
+ info = {
+ 'id': video_id,
+ 'title': player_setup.get('title') or video_data.get('title_short'),
+ 'description': video_data.get('description_long') or video_data.get('description_short'),
+ 'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
+ 'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
+ }
+ asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}
+
+ formats = []
+ hls_url = asset.get('hls_url')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ mp4_url = asset.get('mp4_url')
+ if mp4_url:
+ tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None)
+ format_id = 'http'
+ if tbr:
+ format_id += '-' + tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': mp4_url,
+ 'tbr': int_or_none(tbr),
+ })
+ if formats:
+ self._sort_formats(formats)
+ info['formats'] = formats
+ info['duration'] = int_or_none(asset.get('duration'))
+ return info
+
+ for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
+ provider_video_id = video_data.get('%s_id' % provider_video_type)
+ if not provider_video_id:
+ continue
+ if provider_video_type == 'brightcove':
+ info['formats'] = self._extract_once_formats(provider_video_id)
+ self._sort_formats(info['formats'])
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+ 'ie_key': provider_video_type.capitalize(),
+ })
+ return info
+ raise ExtractorError('Unable to find provider video id')
+
+
+class VoxMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
+ _TESTS = [{
+ # Volume embed, Youtube
+ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
+ 'info_dict': {
+ 'id': 'j4mLW6x17VM',
+ 'ext': 'mp4',
+ 'title': 'Material world: how Google discovered what software is made of',
+ 'description': 'md5:dfc17e7715e3b542d66e33a109861382',
+ 'upload_date': '20190710',
+ 'uploader_id': 'TheVerge',
+ 'uploader': 'The Verge',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ # Volume embed, Youtube
+ 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
+ 'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
+ 'info_dict': {
+ 'id': 'Gy8Md3Eky38',
+ 'ext': 'mp4',
+ 'title': 'The Nexus 6: hands-on with Google\'s phablet',
+ 'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d',
+ 'uploader_id': 'TheVerge',
+ 'upload_date': '20141021',
+ 'uploader': 'The Verge',
+ 'timestamp': 1413907200,
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'similar to the previous test',
+ }, {
+ # Volume embed, Youtube
+ 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
+ 'info_dict': {
+ 'id': '22986359b',
+ 'ext': 'mp4',
+ 'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
+ 'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
+ 'upload_date': '20150915',
+ 'timestamp': 1442332800,
+ 'duration': 285,
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'similar to the previous test',
+ }, {
+ # youtube embed
+ 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
+ 'md5': '83b3080489fb103941e549352d3e0977',
+ 'info_dict': {
+ 'id': 'FcNHTJU1ufM',
+ 'ext': 'mp4',
+ 'title': 'How "the robot" became the greatest novelty dance of all time',
+ 'description': 'md5:b081c0d588b8b2085870cda55e6da176',
+ 'upload_date': '20160324',
+ 'uploader_id': 'voxdotcom',
+ 'uploader': 'Vox',
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'Page no longer contain videos',
+ }, {
+ # SBN.VideoLinkset.entryGroup multiple ooyala embeds
+ 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+ 'info_dict': {
+ 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+ 'title': '25 lies you will tell yourself on National Signing Day',
+ 'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!',
+ },
+ 'playlist': [{
+ 'md5': '721fededf2ab74ae4176c8c8cbfe092e',
+ 'info_dict': {
+ 'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9',
+ 'ext': 'mp4',
+ 'title': 'Buddy Hield vs Steph Curry (and the world)',
+ 'description': 'Let’s dissect only the most important Final Four storylines.',
+ },
+ }, {
+ 'md5': 'bf0c5cc115636af028be1bab79217ea9',
+ 'info_dict': {
+ 'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj',
+ 'ext': 'mp4',
+ 'title': 'Chasing Cinderella 2016: Syracuse basketball',
+ 'description': 'md5:e02d56b026d51aa32c010676765a690d',
+ },
+ }],
+ 'skip': 'Page no longer contain videos',
+ }, {
+ # volume embed, Brightcove Once
+ 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
+ 'md5': '2dbc77b8b0bff1894c2fce16eded637d',
+ 'info_dict': {
+ 'id': '1231c973d',
+ 'ext': 'mp4',
+ 'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
+ 'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+ 'timestamp': 1402938000,
+ 'upload_date': '20140616',
+ 'duration': 4114,
+ },
+ 'add_ie': ['VoxMediaVolume'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
+
+ def create_entry(provider_video_id, provider_video_type, title=None, description=None):
+ video_url = {
+ 'youtube': '%s',
+ 'ooyala': 'ooyala:%s',
+ 'volume': 'http://volume.vox-cdn.com/embed/%s',
+ }[provider_video_type] % provider_video_id
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': title or self._og_search_title(webpage),
+ 'description': description or self._og_search_description(webpage),
+ }
+
+ entries = []
+ entries_data = self._search_regex([
+ r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);',
+ r'var\s+entry\s*=\s*({.+});',
+ r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])',
+ ], webpage, 'video data', default=None)
+ if entries_data:
+ entries_data = self._parse_json(entries_data, display_id)
+ if isinstance(entries_data, dict):
+ entries_data = [entries_data]
+ for video_data in entries_data:
+ provider_video_id = video_data.get('provider_video_id')
+ provider_video_type = video_data.get('provider_video_type')
+ if provider_video_id and provider_video_type:
+ entries.append(create_entry(
+ provider_video_id, provider_video_type,
+ video_data.get('title'), video_data.get('description')))
+
+ provider_video_id = self._search_regex(
+ r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None)
+ if provider_video_id:
+ entries.append(create_entry(provider_video_id, 'ooyala'))
+
+ volume_uuid = self._search_regex(
+ r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
+ if volume_uuid:
+ entries.append(create_entry(volume_uuid, 'volume'))
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage))
diff --git a/hypervideo_dl/extractor/vrak.py b/hypervideo_dl/extractor/vrak.py
new file mode 100644
index 0000000..daa247c
--- /dev/null
+++ b/hypervideo_dl/extractor/vrak.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ smuggle_url,
+ unescapeHTML,
+)
+
+
+class VrakIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P<id>[\d.]+)'
+ _TEST = {
+ 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721',
+ 'info_dict': {
+ 'id': '5345661243001',
+ 'ext': 'mp4',
+ 'title': 'Obésité, film de hockey et Roseline Filion',
+ 'timestamp': 1488492126,
+ 'upload_date': '20170302',
+ 'uploader_id': '2890187628001',
+ 'creator': 'VRAK.TV',
+ 'age_limit': 8,
+ 'series': 'ALT (Actualité Légèrement Tordue)',
+ 'episode': 'Obésité, film de hockey et Roseline Filion',
+ 'tags': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h\d\b[^>]+\bclass=["\']videoTitle["\'][^>]*>([^<]+)',
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+
+ content = self._parse_json(
+ self._search_regex(
+ r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
+ webpage, 'content', default='{}', group='content'),
+ video_id, transform_source=unescapeHTML)
+
+ ref_id = content.get('refId') or self._search_regex(
+ r'refId&quot;:&quot;([^&]+)&quot;', webpage, 'ref id')
+
+ brightcove_id = self._search_regex(
+ r'''(?x)
+ java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s
+ [^>]*
+ java\.lang\.String\s+value\s*=\s*["'](\d+)
+ ''' % re.escape(ref_id), webpage, 'brightcove id')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['CA']}),
+ 'id': brightcove_id,
+ 'description': content.get('description'),
+ 'creator': content.get('brand'),
+ 'age_limit': parse_age_limit(content.get('rating')),
+ 'series': content.get('showName') or content.get(
+ 'episodeName'), # this is intentional
+ 'season_number': int_or_none(content.get('seasonNumber')),
+ 'episode': title,
+ 'episode_number': int_or_none(content.get('episodeNumber')),
+ 'tags': content.get('tags', []),
+ }
diff --git a/hypervideo_dl/extractor/vrt.py b/hypervideo_dl/extractor/vrt.py
new file mode 100644
index 0000000..4220252
--- /dev/null
+++ b/hypervideo_dl/extractor/vrt.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ float_or_none,
+ get_element_by_class,
+ strip_or_none,
+ unified_timestamp,
+)
+
+
+class VRTIE(InfoExtractor):
+ IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
+ 'md5': 'e1663accf5cf13f375f3cd0d10476669',
+ 'info_dict': {
+ 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
+ 'ext': 'mp4',
+ 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
+ 'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
+ 'timestamp': 1557924660,
+ 'upload_date': '20190515',
+ 'duration': 31.2,
+ },
+ }, {
+ 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
+ 'md5': '910bba927566e9ab992278f647eb4b75',
+ 'info_dict': {
+ 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
+ 'ext': 'mp4',
+ 'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
+ 'timestamp': 1557923760,
+ 'upload_date': '20190515',
+ 'duration': 115.17,
+ },
+ }, {
+ 'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
+ 'only_matching': True,
+ }]
+ _CLIENT_MAP = {
+ 'vrt.be/vrtnws': 'vrtnieuws',
+ 'sporza.be': 'sporza',
+ }
+
+ def _real_extract(self, url):
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(<[^>]+class="vrtvideo"[^>]*>)', webpage, 'vrt video'))
+
+ asset_id = attrs['data-videoid']
+ publication_id = attrs.get('data-publicationid')
+ if publication_id:
+ asset_id = publication_id + '$' + asset_id
+ client = attrs.get('data-client') or self._CLIENT_MAP[site]
+
+ title = strip_or_none(get_element_by_class(
+ 'vrt-title', webpage) or self._html_search_meta(
+ ['og:title', 'twitter:title', 'name'], webpage))
+ description = self._html_search_meta(
+ ['og:description', 'twitter:description', 'description'], webpage)
+ if description == '…':
+ description = None
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage))
+
+ return {
+ '_type': 'url_transparent',
+ 'id': asset_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': attrs.get('data-posterimage'),
+ 'timestamp': timestamp,
+ 'duration': float_or_none(attrs.get('data-duration'), 1000),
+ 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id),
+ 'ie_key': 'Canvas',
+ }
diff --git a/hypervideo_dl/extractor/vrv.py b/hypervideo_dl/extractor/vrv.py
new file mode 100644
index 0000000..6e51469
--- /dev/null
+++ b/hypervideo_dl/extractor/vrv.py
@@ -0,0 +1,277 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import json
+import hashlib
+import hmac
+import random
+import string
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+)
+
+
+class VRVBaseIE(InfoExtractor):
+ _API_DOMAIN = None
+ _API_PARAMS = {}
+ _CMS_SIGNING = {}
+ _TOKEN = None
+ _TOKEN_SECRET = ''
+
+ def _call_api(self, path, video_id, note, data=None):
+ # https://tools.ietf.org/html/rfc5849#section-3
+ base_url = self._API_DOMAIN + '/core/' + path
+ query = [
+ ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
+ ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
+ ('oauth_signature_method', 'HMAC-SHA1'),
+ ('oauth_timestamp', int(time.time())),
+ ]
+ if self._TOKEN:
+ query.append(('oauth_token', self._TOKEN))
+ encoded_query = compat_urllib_parse_urlencode(query)
+ headers = self.geo_verification_headers()
+ if data:
+ data = json.dumps(data).encode()
+ headers['Content-Type'] = 'application/json'
+ base_string = '&'.join([
+ 'POST' if data else 'GET',
+ compat_urllib_parse.quote(base_url, ''),
+ compat_urllib_parse.quote(encoded_query, '')])
+ oauth_signature = base64.b64encode(hmac.new(
+ (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
+ base_string.encode(), hashlib.sha1).digest()).decode()
+ encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
+ try:
+ return self._download_json(
+ '?'.join([base_url, encoded_query]), video_id,
+ note='Downloading %s JSON metadata' % note, headers=headers, data=data)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
+ raise
+
+ def _call_cms(self, path, video_id, note):
+ if not self._CMS_SIGNING:
+ index = self._call_api('index', video_id, 'CMS Signing')
+ self._CMS_SIGNING = index.get('cms_signing') or {}
+ if not self._CMS_SIGNING:
+ for signing_policy in index.get('signing_policies', []):
+ signing_path = signing_policy.get('path')
+ if signing_path and signing_path.startswith('/cms/'):
+ name, value = signing_policy.get('name'), signing_policy.get('value')
+ if name and value:
+ self._CMS_SIGNING[name] = value
+ return self._download_json(
+ self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
+ note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
+
+ def _get_cms_resource(self, resource_key, video_id):
+ return self._call_api(
+ 'cms_resource', video_id, 'resource path', data={
+ 'resource_key': resource_key,
+ })['__links__']['cms_resource']['href']
+
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ 'https://vrv.co/', None, headers=self.geo_verification_headers())
+ self._API_PARAMS = self._parse_json(self._search_regex(
+ [
+ r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
+ r'window\.__APP_CONFIG__\s*=\s*({.+})'
+ ], webpage, 'app config'), None)['cxApiParams']
+ self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
+
+
+class VRVIE(VRVBaseIE):
+ IE_NAME = 'vrv'
+ _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
+ 'info_dict': {
+ 'id': 'GR9PNZ396',
+ 'ext': 'mp4',
+ 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
+ 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
+ 'uploader_id': 'seeso',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # movie listing
+ 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
+ 'info_dict': {
+ 'id': 'G6NQXZ1J6',
+ 'title': 'Lily C.A.T',
+ 'description': 'md5:988b031e7809a6aeb60968be4af7db07',
+ },
+ 'playlist_count': 2,
+ }]
+ _NETRC_MACHINE = 'vrv'
+
+ def _real_initialize(self):
+ super(VRVIE, self)._real_initialize()
+
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ token_credentials = self._call_api(
+ 'authenticate/by:credentials', None, 'Token Credentials', data={
+ 'email': email,
+ 'password': password,
+ })
+ self._TOKEN = token_credentials['oauth_token']
+ self._TOKEN_SECRET = token_credentials['oauth_token_secret']
+
+ def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
+ if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
+ return []
+ stream_id_list = []
+ if audio_lang:
+ stream_id_list.append('audio-%s' % audio_lang)
+ if hardsub_lang:
+ stream_id_list.append('hardsub-%s' % hardsub_lang)
+ format_id = stream_format
+ if stream_id_list:
+ format_id += '-' + '-'.join(stream_id_list)
+ if 'hls' in stream_format:
+ adaptive_formats = self._extract_m3u8_formats(
+ url, video_id, 'mp4', m3u8_id=format_id,
+ note='Downloading %s information' % format_id,
+ fatal=False)
+ elif stream_format == 'dash':
+ adaptive_formats = self._extract_mpd_formats(
+ url, video_id, mpd_id=format_id,
+ note='Downloading %s information' % format_id,
+ fatal=False)
+ if audio_lang:
+ for f in adaptive_formats:
+ if f.get('acodec') != 'none':
+ f['language'] = audio_lang
+ return adaptive_formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ object_data = self._call_cms(self._get_cms_resource(
+ 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
+ resource_path = object_data['__links__']['resource']['href']
+ video_data = self._call_cms(resource_path, video_id, 'video')
+ title = video_data['title']
+ description = video_data.get('description')
+
+ if video_data.get('__class__') == 'movie_listing':
+ items = self._call_cms(
+ video_data['__links__']['movie_listing/movies']['href'],
+ video_id, 'movie listing').get('items') or []
+ if len(items) != 1:
+ entries = []
+ for item in items:
+ item_id = item.get('id')
+ if not item_id:
+ continue
+ entries.append(self.url_result(
+ 'https://vrv.co/watch/' + item_id,
+ self.ie_key(), item_id, item.get('title')))
+ return self.playlist_result(entries, video_id, title, description)
+ video_data = items[0]
+
+ streams_path = video_data['__links__'].get('streams', {}).get('href')
+ if not streams_path:
+ self.raise_login_required()
+ streams_json = self._call_cms(streams_path, video_id, 'streams')
+
+ audio_locale = streams_json.get('audio_locale')
+ formats = []
+ for stream_type, streams in streams_json.get('streams', {}).items():
+ if stream_type in ('adaptive_hls', 'adaptive_dash'):
+ for stream in streams.values():
+ formats.extend(self._extract_vrv_formats(
+ stream.get('url'), video_id, stream_type.split('_')[1],
+ audio_locale, stream.get('hardsub_locale')))
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for k in ('captions', 'subtitles'):
+ for subtitle in streams_json.get(k, {}).values():
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
+ 'url': subtitle_url,
+ 'ext': subtitle.get('format', 'ass'),
+ })
+
+ thumbnails = []
+ for thumbnail in video_data.get('images', {}).get('thumbnails', []):
+ thumbnail_url = thumbnail.get('source')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'duration': float_or_none(video_data.get('duration_ms'), 1000),
+ 'uploader_id': video_data.get('channel_id'),
+ 'series': video_data.get('series_title'),
+ 'season': video_data.get('season_title'),
+ 'season_number': int_or_none(video_data.get('season_number')),
+ 'season_id': video_data.get('season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episode_number')),
+ 'episode_id': video_data.get('production_episode_id'),
+ }
+
+
+class VRVSeriesIE(VRVBaseIE):
+ IE_NAME = 'vrv:series'
+ _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
+ 'info_dict': {
+ 'id': 'G68VXG3G6',
+ },
+ 'playlist_mincount': 11,
+ }
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+
+ seasons_path = self._get_cms_resource(
+ 'cms:/seasons?series_id=' + series_id, series_id)
+ seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
+
+ entries = []
+ for season in seasons_data.get('items', []):
+ episodes_path = season['__links__']['season/episodes']['href']
+ episodes = self._call_cms(episodes_path, series_id, 'episodes')
+ for episode in episodes.get('items', []):
+ episode_id = episode['id']
+ entries.append(self.url_result(
+ 'https://vrv.co/watch/' + episode_id,
+ 'VRV', episode_id, episode.get('title')))
+
+ return self.playlist_result(entries, series_id)
diff --git a/hypervideo_dl/extractor/vshare.py b/hypervideo_dl/extractor/vshare.py
new file mode 100644
index 0000000..c631ac1
--- /dev/null
+++ b/hypervideo_dl/extractor/vshare.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+ decode_packed_codes,
+ ExtractorError,
+)
+
+
+class VShareIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://vshare.io/d/0f64ce6',
+ 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+ 'info_dict': {
+ 'id': '0f64ce6',
+ 'title': 'vl14062007715967',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
+ webpage)
+
+ def _extract_packed(self, webpage):
+ packed = self._search_regex(
+ r'(eval\(function.+)', webpage, 'packed code')
+ unpacked = decode_packed_codes(packed)
+ digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
+ digits = [int(digit) for digit in digits.split(',')]
+ key_digit = self._search_regex(
+ r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
+ chars = [compat_chr(d - int(key_digit)) for d in digits]
+ return ''.join(chars)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
+ video_id, headers={'Referer': url})
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ title = title.split(' - ')[0]
+
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
+ 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ info = self._parse_html5_media_entries(
+ url, '<video>%s</video>' % self._extract_packed(webpage),
+ video_id)[0]
+
+ self._sort_formats(info['formats'])
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/vtm.py b/hypervideo_dl/extractor/vtm.py
new file mode 100644
index 0000000..093f1aa
--- /dev/null
+++ b/hypervideo_dl/extractor/vtm.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class VTMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
+ _TEST = {
+ 'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
+ 'md5': '37dca85fbc3a33f2de28ceb834b071f8',
+ 'info_dict': {
+ 'id': '192445',
+ 'ext': 'mp4',
+ 'title': 'Gast vernielt Genkse hotelkamer',
+ 'timestamp': 1611060180,
+ 'upload_date': '20210119',
+ 'duration': 74,
+ # TODO: fix url _type result processing
+ # 'series': 'Op Interventie',
+ }
+ }
+
+ def _real_extract(self, url):
+ uuid = self._match_id(url)
+ video = self._download_json(
+ 'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
+ uuid, query={
+ 'query': '''{
+ getComponent(type: Video, uuid: "%s") {
+ ... on Video {
+ description
+ duration
+ myChannelsVideo
+ program {
+ title
+ }
+ publishedAt
+ title
+ }
+ }
+}''' % uuid,
+ }, headers={
+ 'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
+ })['data']['getComponent']
+
+ return {
+ '_type': 'url',
+ 'id': uuid,
+ 'title': video.get('title'),
+ 'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('publishedAt')),
+ 'duration': int_or_none(video.get('duration')),
+ 'series': try_get(video, lambda x: x['program']['title']),
+ 'ie_key': 'Medialaan',
+ }
diff --git a/hypervideo_dl/extractor/vube.py b/hypervideo_dl/extractor/vube.py
new file mode 100644
index 0000000..8ce3a6b
--- /dev/null
+++ b/hypervideo_dl/extractor/vube.py
@@ -0,0 +1,172 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ int_or_none,
+ ExtractorError,
+)
+
+
+class VubeIE(InfoExtractor):
+ IE_NAME = 'vube'
+ IE_DESC = 'Vube.com'
+ _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
+
+ _TESTS = [
+ {
+ 'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s',
+ 'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42',
+ 'info_dict': {
+ 'id': 'Y8NUZ69Tf7',
+ 'ext': 'mp4',
+ 'title': 'Best Drummer Ever [HD]',
+ 'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'William',
+ 'timestamp': 1406876915,
+ 'upload_date': '20140801',
+ 'duration': 258.051,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ }, {
+ 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
+ 'md5': 'db7aba89d4603dadd627e9d1973946fe',
+ 'info_dict': {
+ 'id': 'YL2qNPkqon',
+ 'ext': 'mp4',
+ 'title': 'Chiara Grispo - Price Tag by Jessie J',
+ 'description': 'md5:8ea652a1f36818352428cb5134933313',
+ 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
+ 'uploader': 'Chiara.Grispo',
+ 'timestamp': 1388743358,
+ 'upload_date': '20140103',
+ 'duration': 170.56,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
+ },
+ 'skip': 'Removed due to DMCA',
+ },
+ {
+ 'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
+ 'md5': '5d4a52492d76f72712117ce6b0d98d08',
+ 'info_dict': {
+ 'id': 'UeBhTudbfS',
+ 'ext': 'mp4',
+ 'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
+ 'description': 'md5:40bcacb97796339f1690642c21d56f4a',
+ 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
+ 'uploader': 'Seraina',
+ 'timestamp': 1396492438,
+ 'upload_date': '20140403',
+ 'duration': 240.107,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'categories': ['seraina', 'jessica', 'krewella', 'alive'],
+ },
+ 'skip': 'Removed due to DMCA',
+ }, {
+ 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
+ 'md5': '0584fc13b50f887127d9d1007589d27f',
+ 'info_dict': {
+ 'id': '0nmsMY5vEq',
+ 'ext': 'mp4',
+ 'title': 'Frozen - Let It Go Cover by Siren Gene',
+ 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
+ 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
+ 'uploader': 'Siren',
+ 'timestamp': 1395448018,
+ 'upload_date': '20140322',
+ 'duration': 221.788,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
+ },
+ 'skip': 'Removed due to DMCA',
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ video = self._download_json(
+ 'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
+
+ public_id = video['public_id']
+
+ formats = []
+
+ for media in video['media'].get('video', []) + video['media'].get('audio', []):
+ if media['transcoding_status'] != 'processed':
+ continue
+ fmt = {
+ 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
+ 'abr': int(media['audio_bitrate']),
+ 'format_id': compat_str(media['media_resolution_id']),
+ }
+ vbr = int(media['video_bitrate'])
+ if vbr:
+ fmt.update({
+ 'vbr': vbr,
+ 'height': int(media['height']),
+ })
+ formats.append(fmt)
+
+ self._sort_formats(formats)
+
+ if not formats and video.get('vst') == 'dmca':
+ raise ExtractorError(
+ 'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
+ expected=True)
+
+ title = video['title']
+ description = video.get('description')
+ thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
+ uploader = video.get('user_alias') or video.get('channel')
+ timestamp = int_or_none(video.get('upload_time'))
+ duration = video['duration']
+ view_count = video.get('raw_view_count')
+ like_count = video.get('total_likes')
+ dislike_count = video.get('total_hates')
+
+ comments = video.get('comments')
+ comment_count = None
+ if comments is None:
+ comment_data = self._download_json(
+ 'http://vube.com/api/video/%s/comment' % video_id,
+ video_id, 'Downloading video comment JSON', fatal=False)
+ if comment_data is not None:
+ comment_count = int_or_none(comment_data.get('total'))
+ else:
+ comment_count = len(comments)
+
+ categories = [tag['text'] for tag in video['tags']]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ }
diff --git a/hypervideo_dl/extractor/vuclip.py b/hypervideo_dl/extractor/vuclip.py
new file mode 100644
index 0000000..55e087b
--- /dev/null
+++ b/hypervideo_dl/extractor/vuclip.py
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ remove_end,
+)
+
+
+class VuClipIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247&section=recommend',
+ 'info_dict': {
+ 'id': '1129900602',
+ 'ext': '3gp',
+ 'title': 'Top 10 TV Convicts',
+ 'duration': 733,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ ad_m = re.search(
+ r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
+ if ad_m:
+ urlr = compat_urllib_parse_urlparse(url)
+ adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
+ webpage = self._download_webpage(
+ adfree_url, video_id, note='Download post-ad page')
+
+ error_msg = self._html_search_regex(
+ r'<p class="message">(.*?)</p>', webpage, 'error message',
+ default=None)
+ if error_msg:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_msg), expected=True)
+
+ # These clowns alternate between two page types
+ video_url = self._search_regex(
+ r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
+ webpage, 'video URL', default=None)
+ if video_url:
+ formats = [{
+ 'url': video_url,
+ }]
+ else:
+ formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats']
+
+ title = remove_end(self._html_search_regex(
+ r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
+
+ duration = parse_duration(self._html_search_regex(
+ r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/vvvvid.py b/hypervideo_dl/extractor/vvvvid.py
new file mode 100644
index 0000000..bc196f8
--- /dev/null
+++ b/hypervideo_dl/extractor/vvvvid.py
@@ -0,0 +1,284 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+)
+
+
+class VVVVIDIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
+ _VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
+ _TESTS = [{
+ # video_type == 'video/vvvvid'
+ 'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
+ 'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
+ 'info_dict': {
+ 'id': '489048',
+ 'ext': 'mp4',
+ 'title': 'Ping Pong',
+ 'duration': 239,
+ 'series': '"Perché dovrei guardarlo?" di Dario Moccia',
+ 'season_id': '437',
+ 'episode': 'Ping Pong',
+ 'episode_number': 1,
+ 'episode_id': '3334',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # video_type == 'video/rcs'
+ 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
+ 'md5': '33e0edfba720ad73a8782157fdebc648',
+ 'info_dict': {
+ 'id': '482493',
+ 'ext': 'mp4',
+ 'title': 'Episodio 01',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # video_type == 'video/youtube'
+ 'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
+ 'md5': '33e0edfba720ad73a8782157fdebc648',
+ 'info_dict': {
+ 'id': 'RzmFKUDOUgw',
+ 'ext': 'mp4',
+ 'title': 'Trailer',
+ 'upload_date': '20150906',
+ 'description': 'md5:a5e802558d35247fee285875328c0b80',
+ 'uploader_id': 'BandaiVisual',
+ 'uploader': 'BANDAI NAMCO Arts Channel',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
+ 'only_matching': True
+ }]
+ _conn_id = None
+
+ def _real_initialize(self):
+ self._conn_id = self._download_json(
+ 'https://www.vvvvid.it/user/login',
+ None, headers=self.geo_verification_headers())['data']['conn_id']
+
+ def _download_info(self, show_id, path, video_id, fatal=True, query=None):
+ q = {
+ 'conn_id': self._conn_id,
+ }
+ if query:
+ q.update(query)
+ response = self._download_json(
+ 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
+ video_id, headers=self.geo_verification_headers(), query=q, fatal=fatal)
+ if not (response or fatal):
+ return
+ if response.get('result') == 'error':
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, response['message']), expected=True)
+ return response['data']
+
+ def _extract_common_video_info(self, video_data):
+ return {
+ 'thumbnail': video_data.get('thumbnail'),
+ 'episode_id': str_or_none(video_data.get('id')),
+ }
+
+ def _real_extract(self, url):
+ show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ response = self._download_info(
+ show_id, 'season/%s' % season_id,
+ video_id, query={'video_id': video_id})
+
+ vid = int(video_id)
+ video_data = list(filter(
+ lambda episode: episode.get('video_id') == vid, response))[0]
+ title = video_data['title']
+ formats = []
+
+ # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
+ def ds(h):
+ g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
+
+ def f(m):
+ l = []
+ o = 0
+ b = False
+ m_len = len(m)
+ while ((not b) and o < m_len):
+ n = m[o] << 2
+ o += 1
+ k = -1
+ j = -1
+ if o < m_len:
+ n += m[o] >> 4
+ o += 1
+ if o < m_len:
+ k = (m[o - 1] << 4) & 255
+ k += m[o] >> 2
+ o += 1
+ if o < m_len:
+ j = (m[o - 1] << 6) & 255
+ j += m[o]
+ o += 1
+ else:
+ b = True
+ else:
+ b = True
+ else:
+ b = True
+ l.append(n)
+ if k != -1:
+ l.append(k)
+ if j != -1:
+ l.append(j)
+ return l
+
+ c = []
+ for e in h:
+ c.append(g.index(e))
+
+ c_len = len(c)
+ for e in range(c_len * 2 - 1, -1, -1):
+ a = c[e % c_len] ^ c[(e + 1) % c_len]
+ c[e % c_len] = a
+
+ c = f(c)
+ d = ''
+ for e in c:
+ d += chr(e)
+
+ return d
+
+ info = {}
+
+ def metadata_from_url(r_url):
+ if not info and r_url:
+ mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
+ if mobj:
+ info['episode_number'] = int(mobj.group(2))
+ season_number = mobj.group(1)
+ if season_number:
+ info['season_number'] = int(season_number)
+
+ video_type = video_data.get('video_type')
+ is_youtube = False
+ for quality in ('', '_sd'):
+ embed_code = video_data.get('embed_info' + quality)
+ if not embed_code:
+ continue
+ embed_code = ds(embed_code)
+ if video_type == 'video/kenc':
+ embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
+ kenc = self._download_json(
+ 'https://www.vvvvid.it/kenc', video_id, query={
+ 'action': 'kt',
+ 'conn_id': self._conn_id,
+ 'url': embed_code,
+ }, fatal=False) or {}
+ kenc_message = kenc.get('message')
+ if kenc_message:
+ embed_code += '?' + ds(kenc_message)
+ formats.extend(self._extract_m3u8_formats(
+ embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif video_type == 'video/rcs':
+ formats.extend(self._extract_akamai_formats(embed_code, video_id))
+ elif video_type == 'video/youtube':
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'url': embed_code,
+ })
+ is_youtube = True
+ break
+ else:
+ formats.extend(self._extract_wowza_formats(
+ 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
+ metadata_from_url(embed_code)
+
+ if not is_youtube:
+ self._sort_formats(formats)
+ info['formats'] = formats
+
+ metadata_from_url(video_data.get('thumbnail'))
+ info.update(self._extract_common_video_info(video_data))
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'duration': int_or_none(video_data.get('length')),
+ 'series': video_data.get('show_title'),
+ 'season_id': season_id,
+ 'episode': title,
+ 'view_count': int_or_none(video_data.get('views')),
+ 'like_count': int_or_none(video_data.get('video_likes')),
+ 'repost_count': int_or_none(video_data.get('video_shares')),
+ })
+ return info
+
+
+class VVVVIDShowIE(VVVVIDIE):
+ _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'https://www.vvvvid.it/show/156/psyco-pass',
+ 'info_dict': {
+ 'id': '156',
+ 'title': 'Psycho-Pass',
+ 'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
+ },
+ 'playlist_count': 46,
+ }, {
+ 'url': 'https://www.vvvvid.it/show/156',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
+
+ seasons = self._download_info(
+ show_id, 'seasons/', show_title)
+
+ show_info = self._download_info(
+ show_id, 'info/', show_title, fatal=False)
+
+ if not show_title:
+ base_url += "/title"
+
+ entries = []
+ for season in (seasons or []):
+ episodes = season.get('episodes') or []
+ playlist_title = season.get('name') or show_info.get('title')
+ for episode in episodes:
+ if episode.get('playable') is False:
+ continue
+ season_id = str_or_none(episode.get('season_id'))
+ video_id = str_or_none(episode.get('video_id'))
+ if not (season_id and video_id):
+ continue
+ info = self._extract_common_video_info(episode)
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': VVVVIDIE.ie_key(),
+ 'url': '/'.join([base_url, season_id, video_id]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'season_id': season_id,
+ 'playlist_title': playlist_title,
+ })
+ entries.append(info)
+
+ return self.playlist_result(
+ entries, show_id, show_info.get('title'), show_info.get('description'))
diff --git a/hypervideo_dl/extractor/vyborymos.py b/hypervideo_dl/extractor/vyborymos.py
new file mode 100644
index 0000000..9e703c4
--- /dev/null
+++ b/hypervideo_dl/extractor/vyborymos.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+
+
+class VyboryMosIE(InfoExtractor):
+ _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://vybory.mos.ru/#precinct/13636',
+ 'info_dict': {
+ 'id': '13636',
+ 'ext': 'mp4',
+ 'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'Россия, Москва, улица Введенского, 32А',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://vybory.mos.ru/account/channels?station_id=13636',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ station_id = self._match_id(url)
+
+ channels = self._download_json(
+ 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id,
+ station_id, 'Downloading channels JSON')
+
+ formats = []
+ for cam_num, (sid, hosts, name, _) in enumerate(channels, 1):
+ for num, host in enumerate(hosts, 1):
+ formats.append({
+ 'url': 'http://%s/master.m3u8?sid=%s' % (host, sid),
+ 'ext': 'mp4',
+ 'format_id': 'camera%d-host%d' % (cam_num, num),
+ 'format_note': '%s, %s' % (name, host),
+ })
+
+ info = self._download_json(
+ 'http://vybory.mos.ru/json/voting_stations/%s/%s.json'
+ % (compat_str(station_id)[:3], station_id),
+ station_id, 'Downloading station JSON', fatal=False)
+
+ return {
+ 'id': station_id,
+ 'title': self._live_title(info['name'] if info else station_id),
+ 'description': info.get('address'),
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/vzaar.py b/hypervideo_dl/extractor/vzaar.py
new file mode 100644
index 0000000..b7d02fc
--- /dev/null
+++ b/hypervideo_dl/extractor/vzaar.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class VzaarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+ _TESTS = [{
+ # HTTP and HLS
+ 'url': 'https://vzaar.com/videos/1152805',
+ 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
+ 'info_dict': {
+ 'id': '1152805',
+ 'ext': 'mp4',
+ 'title': 'sample video (public)',
+ },
+ }, {
+ 'url': 'https://view.vzaar.com/27272/player',
+ 'md5': '3b50012ac9bbce7f445550d54e0508f2',
+ 'info_dict': {
+ 'id': '27272',
+ 'ext': 'mp3',
+ 'title': 'MP3',
+ },
+ }, {
+ # hlsAes = true
+ 'url': 'https://view.vzaar.com/11379930/player',
+ 'info_dict': {
+ 'id': '11379930',
+ 'ext': 'mp4',
+ 'title': 'Videoaula',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # with null videoTitle
+ 'url': 'https://view.vzaar.com/20313539/download',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
+
+ title = video_data.get('videoTitle') or video_id
+
+ formats = []
+
+ source_url = url_or_none(video_data.get('sourceUrl'))
+ if source_url:
+ f = {
+ 'url': source_url,
+ 'format_id': 'http',
+ 'preference': 1,
+ }
+ if 'audio' in source_url:
+ f.update({
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ })
+ else:
+ f.update({
+ 'width': int_or_none(video_data.get('width')),
+ 'height': int_or_none(video_data.get('height')),
+ 'ext': 'mp4',
+ 'fps': float_or_none(video_data.get('fps')),
+ })
+ formats.append(f)
+
+ video_guid = video_data.get('guid')
+ usp = video_data.get('usp')
+ if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
+ hls_aes = video_data.get('hlsAes')
+ qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
+ url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
+ m3u8_formats = self._extract_m3u8_formats(
+ url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ if hls_aes:
+ for f in m3u8_formats:
+ f['_decryption_key_url'] = url_templ % ('goose', '') + qs
+ formats.extend(m3u8_formats)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._proto_relative_url(video_data.get('poster')),
+ 'duration': float_or_none(video_data.get('videoDuration')),
+ 'timestamp': unified_timestamp(video_data.get('ts')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/wakanim.py b/hypervideo_dl/extractor/wakanim.py
new file mode 100644
index 0000000..f9a2395
--- /dev/null
+++ b/hypervideo_dl/extractor/wakanim.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ merge_dicts,
+ urljoin,
+)
+
+
+class WakanimIE(InfoExtractor):
+ _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu',
+ 'info_dict': {
+ 'id': '2997',
+ 'ext': 'mp4',
+ 'title': 'Episode 02',
+ 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d',
+ 'series': 'The Asterisk War (OmU.)',
+ 'season_number': 1,
+ 'episode': 'Episode 02',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # DRM Protected
+ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ m3u8_url = urljoin(url, self._search_regex(
+ r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url',
+ group='url'))
+ # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
+ encryption = self._search_regex(
+ r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
+ m3u8_url, 'encryption', default=None)
+ if encryption and encryption in ('cenc', 'cbcs-aapl'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._search_regex(
+ (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'),
+ webpage, 'title', default=None, group='title')
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ })
diff --git a/hypervideo_dl/extractor/walla.py b/hypervideo_dl/extractor/walla.py
new file mode 100644
index 0000000..cbb5486
--- /dev/null
+++ b/hypervideo_dl/extractor/walla.py
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ int_or_none,
+)
+
+
+class WallaIE(InfoExtractor):
+ _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
+ _TEST = {
+ 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
+ 'info_dict': {
+ 'id': '2642630',
+ 'display_id': 'one-direction-all-for-one',
+ 'ext': 'flv',
+ 'title': 'וואן דיירקשן: ההיסטריה',
+ 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3600,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }
+
+ _SUBTITLE_LANGS = {
+ 'עברית': 'heb',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ video = self._download_xml(
+ 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id,
+ display_id)
+
+ item = video.find('./items/item')
+
+ title = xpath_text(item, './title', 'title')
+ description = xpath_text(item, './synopsis', 'description')
+ thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
+ duration = int_or_none(xpath_text(item, './duration', 'duration'))
+
+ subtitles = {}
+ for subtitle in item.findall('./subtitles/subtitle'):
+ lang = xpath_text(subtitle, './title')
+ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+ 'ext': 'srt',
+ 'url': xpath_text(subtitle, './src'),
+ }]
+
+ formats = []
+ for quality in item.findall('./qualities/quality'):
+ format_id = xpath_text(quality, './title')
+ fmt = {
+ 'url': 'rtmp://wafla.walla.co.il/vod',
+ 'play_path': xpath_text(quality, './src'),
+ 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
+ 'page_url': url,
+ 'ext': 'flv',
+ 'format_id': xpath_text(quality, './title'),
+ }
+ m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
+ if m:
+ fmt['height'] = int(m.group('height'))
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/washingtonpost.py b/hypervideo_dl/extractor/washingtonpost.py
new file mode 100644
index 0000000..8afb1af
--- /dev/null
+++ b/hypervideo_dl/extractor/washingtonpost.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class WashingtonPostIE(InfoExtractor):
+ IE_NAME = 'washingtonpost'
+ _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _TESTS = [{
+ 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+ 'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
+ 'info_dict': {
+ 'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+ 'ext': 'mp4',
+ 'title': 'Egypt finds belongings, debris from plane crash',
+ 'description': 'md5:a17ceee432f215a5371388c1f680bd86',
+ 'upload_date': '20160520',
+ 'timestamp': 1463775187,
+ },
+ }, {
+ 'url': 'https://www.washingtonpost.com/video/world/egypt-finds-belongings-debris-from-plane-crash/2016/05/20/480ba4ee-1ec7-11e6-82c2-a7dcb313287d_video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.washingtonpost.com/posttv/world/iraq-to-track-down-antiquities-after-islamic-state-museum-rampage/2015/02/28/7c57e916-bf86-11e4-9dfb-03366e719af8_video.html',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'arcpublishing:wapo:' + video_id, 'ArcPublishing', video_id)
+
+
+class WashingtonPostArticleIE(InfoExtractor):
+ IE_NAME = 'washingtonpost:article'
+ _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+ 'info_dict': {
+ 'id': 'sinkhole-of-bureaucracy',
+ 'title': 'Sinkhole of bureaucracy',
+ },
+ 'playlist': [{
+ 'md5': 'b9be794ceb56c7267d410a13f99d801a',
+ 'info_dict': {
+ 'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
+ 'ext': 'mp4',
+ 'title': 'Breaking Points: The Paper Mine',
+ 'duration': 1290,
+ 'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
+ 'timestamp': 1395440416,
+ 'upload_date': '20140321',
+ },
+ }, {
+ 'md5': '1fff6a689d8770966df78c8cb6c8c17c',
+ 'info_dict': {
+ 'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
+ 'ext': 'mp4',
+ 'title': 'The town bureaucracy sustains',
+ 'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
+ 'duration': 2220,
+ 'timestamp': 1395441819,
+ 'upload_date': '20140321',
+ },
+ }],
+ }, {
+ 'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
+ 'info_dict': {
+ 'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
+ 'title': 'One airline figured out how to make sure its airplanes never disappear',
+ },
+ 'playlist': [{
+ 'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
+ 'info_dict': {
+ 'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
+ 'ext': 'mp4',
+ 'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
+ 'upload_date': '20141230',
+ 'timestamp': 1419972442,
+ 'title': 'Why black boxes don’t transmit data in real time',
+ }
+ }]
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ title = self._og_search_title(webpage)
+
+ uuids = re.findall(r'''(?x)
+ (?:
+ <div\s+class="posttv-video-embed[^>]*?data-uuid=|
+ data-video-uuid=
+ )"([^"]+)"''', webpage)
+ entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': page_id,
+ 'title': title,
+ }
diff --git a/hypervideo_dl/extractor/wat.py b/hypervideo_dl/extractor/wat.py
new file mode 100644
index 0000000..f1bccc2
--- /dev/null
+++ b/hypervideo_dl/extractor/wat.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class WatIE(InfoExtractor):
+ _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
+ IE_NAME = 'wat.tv'
+ _TESTS = [
+ {
+ 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
+ 'info_dict': {
+ 'id': '11713067',
+ 'ext': 'mp4',
+ 'title': 'Soupe de figues à l\'orange et aux épices',
+ 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
+ 'upload_date': '20140819',
+ 'duration': 120,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 404'],
+ 'skip': 'This content is no longer available',
+ },
+ {
+ 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
+ 'md5': 'b16574df2c3cd1a36ca0098f2a791925',
+ 'info_dict': {
+ 'id': '11713075',
+ 'ext': 'mp4',
+ 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
+ 'upload_date': '20140816',
+ },
+ 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
+ 'skip': 'This content is no longer available',
+ },
+ ]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
+
+ # 'contentv4' is used in the website, but it also returns the related
+ # videos, we don't need them
+ # video_data = self._download_json(
+ # 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
+ video_data = self._download_json(
+ 'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
+ video_id, query={'context': 'MYTF1'})
+ video_info = video_data['media']
+
+ error_desc = video_info.get('error_desc')
+ if error_desc:
+ if video_info.get('error_code') == 'GEOBLOCKED':
+ self.raise_geo_restricted(error_desc, video_info.get('geoList'))
+ raise ExtractorError(error_desc, expected=True)
+
+ title = video_info['title']
+
+ formats = []
+
+ def extract_formats(manifest_urls):
+ for f, f_url in manifest_urls.items():
+ if not f_url:
+ continue
+ if f in ('dash', 'mpd'):
+ formats.extend(self._extract_mpd_formats(
+ f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
+ video_id, mpd_id='dash', fatal=False))
+ elif f == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ delivery = video_data.get('delivery') or {}
+ extract_formats({delivery.get('format'): delivery.get('url')})
+ if not formats:
+ if delivery.get('drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ manifest_urls = self._download_json(
+ 'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
+ if manifest_urls:
+ extract_formats(manifest_urls)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': video_info.get('preview'),
+ 'upload_date': unified_strdate(try_get(
+ video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/watchbox.py b/hypervideo_dl/extractor/watchbox.py
new file mode 100644
index 0000000..5a4e46e
--- /dev/null
+++ b/hypervideo_dl/extractor/watchbox.py
@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ strip_or_none,
+ try_get,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class WatchBoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)'
+ _TESTS = [{
+ # film
+ 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html',
+ 'info_dict': {
+ 'id': '341368',
+ 'ext': 'mp4',
+ 'title': 'Free Jimmy',
+ 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4890,
+ 'age_limit': 16,
+ 'release_year': 2009,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ # episode
+ 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html',
+ 'info_dict': {
+ 'id': '328286',
+ 'ext': 'mp4',
+ 'title': 'S01 E01 - Date in der Hölle',
+ 'description': 'md5:2f31c74a8186899f33cb5114491dae2b',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1291,
+ 'age_limit': 12,
+ 'release_year': 2010,
+ 'series': 'Ugly Americans',
+ 'season_number': 1,
+ 'episode': 'Date in der Hölle',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ kind, video_id = mobj.group('kind', 'id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_config = self._parse_json(
+ self._search_regex(
+ r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
+ 'player config', default='{}', group='data'),
+ video_id, transform_source=unescapeHTML, fatal=False)
+
+ if not player_config:
+ player_config = self._parse_json(
+ self._search_regex(
+ r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False) or {}
+
+ source = player_config.get('source') or {}
+
+ video_id = compat_str(source.get('videoId') or video_id)
+
+ devapi = self._download_json(
+ 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={
+ 'format': 'json',
+ 'apikey': 'hbbtv',
+ }, fatal=False)
+
+ item = try_get(devapi, lambda x: x['items'][0], dict) or {}
+
+ title = item.get('title') or try_get(
+ item, lambda x: x['movie']['headline_movie'],
+ compat_str) or source['title']
+
+ formats = []
+ hls_url = item.get('media_videourl_hls') or source.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ dash_url = item.get('media_videourl_wv') or source.get('dash')
+ if dash_url:
+ formats.extend(self._extract_mpd_formats(
+ dash_url, video_id, mpd_id='dash', fatal=False))
+ mp4_url = item.get('media_videourl')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ 'format_id': 'mp4',
+ 'width': int_or_none(item.get('width')),
+ 'height': int_or_none(item.get('height')),
+ 'tbr': int_or_none(item.get('bitrate')),
+ })
+ self._sort_formats(formats)
+
+ description = strip_or_none(item.get('descr'))
+ thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
+ duration = int_or_none(item.get('media_length') or source.get('length'))
+ timestamp = unified_timestamp(item.get('pubDate'))
+ view_count = int_or_none(item.get('media_views'))
+ age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk']))
+ release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year']))
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'release_year': release_year,
+ 'formats': formats,
+ }
+
+ if kind.lower() == 'serien':
+ series = try_get(
+ item, lambda x: x['special']['title'],
+ compat_str) or source.get('format')
+ season_number = int_or_none(self._search_regex(
+ r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number',
+ default=None) or self._search_regex(
+ r'/staffel-(\d+)/', url, 'season number', default=None))
+ episode = source.get('title')
+ episode_number = int_or_none(self._search_regex(
+ r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number',
+ default=None))
+ info.update({
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/watchindianporn.py b/hypervideo_dl/extractor/watchindianporn.py
new file mode 100644
index 0000000..fadc539
--- /dev/null
+++ b/hypervideo_dl/extractor/watchindianporn.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class WatchIndianPornIE(InfoExtractor):
+ IE_DESC = 'Watch Indian Porn'
+ _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+ _TEST = {
+ 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
+ 'md5': '249589a164dde236ec65832bfce17440',
+ 'info_dict': {
+ 'id': 'RZa2avywNPa',
+ 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
+ 'ext': 'mp4',
+ 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 226,
+ 'view_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ title = self._html_search_regex((
+ r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
+ r'<h4>(.+?)</h4>'
+ ), webpage, 'title')
+
+ duration = parse_duration(self._search_regex(
+ r'Time:\s*<strong>\s*(.+?)\s*</strong>',
+ webpage, 'duration', fatal=False))
+
+ view_count = int(self._search_regex(
+ r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
+ webpage, 'view count', fatal=False))
+
+ categories = re.findall(
+ r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
+ webpage)
+
+ info_dict.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'http_headers': {
+ 'Referer': url,
+ },
+ 'title': title,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'categories': categories,
+ 'age_limit': 18,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/wdr.py b/hypervideo_dl/extractor/wdr.py
new file mode 100644
index 0000000..2903d18
--- /dev/null
+++ b/hypervideo_dl/extractor/wdr.py
@@ -0,0 +1,347 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ js_to_json,
+ strip_jsonp,
+ try_get,
+ unified_strdate,
+ update_url_query,
+ urlhandle_detect_ext,
+ url_or_none,
+)
+
+
+class WDRIE(InfoExtractor):
+ _VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
+ _GEO_COUNTRIES = ['DE']
+ _TEST = {
+ 'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
+ 'info_dict': {
+ 'id': 'mdb-1557833',
+ 'ext': 'mp4',
+ 'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
+ 'upload_date': '20180112',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_json(
+ url, video_id, transform_source=strip_jsonp)
+
+ is_live = metadata.get('mediaType') == 'live'
+
+ tracker_data = metadata['trackerData']
+ title = tracker_data['trackerClipTitle']
+
+ media_resource = metadata['mediaResource']
+
+ formats = []
+
+ # check if the metadata contains a direct URL to a file
+ for kind, media in media_resource.items():
+ if not isinstance(media, dict):
+ continue
+ if kind not in ('dflt', 'alt'):
+ continue
+
+ for tag_name, medium_url in media.items():
+ if tag_name not in ('videoURL', 'audioURL'):
+ continue
+
+ ext = determine_ext(medium_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ medium_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls'))
+ elif ext == 'f4m':
+ manifest_url = update_url_query(
+ medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
+ formats.extend(self._extract_f4m_formats(
+ manifest_url, video_id, f4m_id='hds', fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ medium_url, 'stream', fatal=False))
+ else:
+ a_format = {
+ 'url': medium_url
+ }
+ if ext == 'unknown_video':
+ urlh = self._request_webpage(
+ medium_url, video_id, note='Determining extension')
+ ext = urlhandle_detect_ext(urlh)
+ a_format['ext'] = ext
+ formats.append(a_format)
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ caption_url = media_resource.get('captionURL')
+ if caption_url:
+ subtitles['de'] = [{
+ 'url': caption_url,
+ 'ext': 'ttml',
+ }]
+ captions_hash = media_resource.get('captionsHash')
+ if isinstance(captions_hash, dict):
+ for ext, format_url in captions_hash.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ subtitles.setdefault('de', []).append({
+ 'url': format_url,
+ 'ext': determine_ext(format_url, None) or ext,
+ })
+
+ return {
+ 'id': tracker_data.get('trackerClipId', video_id),
+ 'title': self._live_title(title) if is_live else title,
+ 'alt_title': tracker_data.get('trackerClipSubcategory'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'upload_date': unified_strdate(tracker_data.get('trackerClipAirTime')),
+ 'is_live': is_live,
+ }
+
+
+class WDRPageIE(InfoExtractor):
+ _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
+ _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
+
+ _TESTS = [
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
+ # HDS download, MD5 is unstable
+ 'info_dict': {
+ 'id': 'mdb-1058683',
+ 'ext': 'flv',
+ 'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100',
+ 'title': 'Geheimnis Aachener Dom',
+ 'alt_title': 'Doku am Freitag',
+ 'upload_date': '20160304',
+ 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318',
+ 'is_live': False,
+ 'subtitles': {'de': [{
+ 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml',
+ 'ext': 'ttml',
+ }]},
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ {
+ 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
+ 'md5': 'f4c1f96d01cf285240f53ea4309663d8',
+ 'info_dict': {
+ 'id': 'mdb-1072000',
+ 'ext': 'mp3',
+ 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
+ 'title': 'Schriftstellerin Juli Zeh',
+ 'alt_title': 'WDR 3 Gespräch am Samstag',
+ 'upload_date': '20160312',
+ 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
+ 'is_live': False,
+ 'subtitles': {}
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
+ 'info_dict': {
+ 'id': 'mdb-1406149',
+ 'ext': 'mp4',
+ 'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'alt_title': 'WDR Fernsehen Live',
+ 'upload_date': '20150101',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
+ 'playlist_mincount': 7,
+ 'info_dict': {
+ 'id': 'aktuelle-stunde-120',
+ },
+ },
+ {
+ 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+ 'info_dict': {
+ 'id': 'mdb-1552552',
+ 'ext': 'mp4',
+ 'upload_date': 're:^[0-9]{8}$',
+ 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
+ },
+ 'skip': 'The id changes from week to week because of the new episode'
+ },
+ {
+ 'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5',
+ 'md5': '803138901f6368ee497b4d195bb164f2',
+ 'info_dict': {
+ 'id': 'mdb-186083',
+ 'ext': 'mp4',
+ 'upload_date': '20130919',
+ 'title': 'Sachgeschichte - Achterbahn ',
+ },
+ },
+ {
+ 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
+ # Live stream, MD5 unstable
+ 'info_dict': {
+ 'id': 'mdb-869971',
+ 'ext': 'mp4',
+ 'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'upload_date': '20160101',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ }
+ },
+ {
+ 'url': 'http://www.sportschau.de/handballem2018/handball-nationalmannschaft-em-stolperstein-vorrunde-100.html',
+ 'info_dict': {
+ 'id': 'mdb-1556012',
+ 'ext': 'mp4',
+ 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"',
+ 'upload_date': '20180111',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+
+ # Article with several videos
+
+ # for wdr.de the data-extension is in a tag with the class "mediaLink"
+ # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
+ # for wdrmaus, in a tag with the class "videoButton" (previously a link
+ # to the page in a multiline "videoLink"-tag)
+ for mobj in re.finditer(
+ r'''(?sx)class=
+ (?:
+ (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+ (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+ )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+ ''', webpage):
+ media_link_obj = self._parse_json(
+ mobj.group('data'), display_id, transform_source=js_to_json,
+ fatal=False)
+ if not media_link_obj:
+ continue
+ jsonp_url = try_get(
+ media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
+ if jsonp_url:
+ entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
+
+ # Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
+ if not entries:
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('href')),
+ ie=WDRPageIE.ie_key())
+ for mobj in re.finditer(
+ r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
+ webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
+ ]
+
+ return self.playlist_result(entries, playlist_id=display_id)
+
+
+class WDRElefantIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
+ _TEST = {
+ 'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
+ 'info_dict': {
+ 'title': 'Folge Oster-Spezial 2015',
+ 'id': 'mdb-1088195',
+ 'ext': 'mp4',
+ 'age_limit': None,
+ 'upload_date': '20150406'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ # Table of Contents seems to always be at this address, so fetch it directly.
+ # The website fetches configurationJS.php5, which links to tableOfContentsJS.php5.
+ table_of_contents = self._download_json(
+ 'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5',
+ display_id)
+ if display_id not in table_of_contents:
+ raise ExtractorError(
+ 'No entry in site\'s table of contents for this URL. '
+ 'Is the fragment part of the URL (after the #) correct?',
+ expected=True)
+ xml_metadata_path = table_of_contents[display_id]['xmlPath']
+ xml_metadata = self._download_xml(
+ 'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path,
+ display_id)
+ zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
+ if zmdb_url_element is None:
+ raise ExtractorError(
+ '%s is not a video' % display_id, expected=True)
+ return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
+
+
+class WDRMobileIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://mobile-ondemand\.wdr\.de/
+ .*?/fsk(?P<age_limit>[0-9]+)
+ /[0-9]+/[0-9]+/
+ (?P<id>[0-9]+)_(?P<title>[0-9]+)'''
+ IE_NAME = 'wdr:mobile'
+ _TEST = {
+ 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
+ 'info_dict': {
+ 'title': '4283021',
+ 'id': '421735',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ },
+ 'skip': 'Problems with loading data.'
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ return {
+ 'id': mobj.group('id'),
+ 'title': mobj.group('title'),
+ 'age_limit': int(mobj.group('age_limit')),
+ 'url': url,
+ 'http_headers': {
+ 'User-Agent': 'mobile',
+ },
+ }
diff --git a/hypervideo_dl/extractor/webcaster.py b/hypervideo_dl/extractor/webcaster.py
new file mode 100644
index 0000000..e4b65f5
--- /dev/null
+++ b/hypervideo_dl/extractor/webcaster.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ xpath_text,
+)
+
+
+class WebcasterIE(InfoExtractor):
+ _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)'
+ _TESTS = [{
+ # http://video.khl.ru/quotes/393859
+ 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18',
+ 'md5': '0c162f67443f30916ff1c89425dcd4cd',
+ 'info_dict': {
+ 'id': 'c8cefd240aa593681c8d068cff59f407_hd',
+ 'ext': 'mp4',
+ 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_xml(url, video_id)
+
+ title = xpath_text(video, './/event_name', 'event name', fatal=True)
+
+ def make_id(parts, separator):
+ return separator.join(filter(None, parts))
+
+ formats = []
+ for format_id in (None, 'noise'):
+ track_tag = make_id(('track', format_id), '_')
+ for track in video.findall('.//iphone/%s' % track_tag):
+ track_url = track.text
+ if not track_url:
+ continue
+ if determine_ext(track_url) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ track_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id=make_id(('hls', format_id), '-'), fatal=False)
+ for f in m3u8_formats:
+ f.update({
+ 'source_preference': 0 if format_id == 'noise' else 1,
+ 'format_note': track.get('title'),
+ })
+ formats.extend(m3u8_formats)
+ self._sort_formats(formats)
+
+ thumbnail = xpath_text(video, './/image', 'thumbnail')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class WebcasterFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
+ 'only_matching': True,
+ }
+
+ @staticmethod
+ def _extract_url(ie, webpage):
+ mobj = re.search(
+ r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+ for secure in (True, False):
+ video_url = ie._og_search_video_url(
+ webpage, secure=secure, default=None)
+ if video_url:
+ mobj = re.search(
+ r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
+ video_url)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ feed = self._download_xml(url, video_id)
+
+ video_url = xpath_text(
+ feed, ('video_hd', 'video'), 'video url', fatal=True)
+
+ return self.url_result(video_url, WebcasterIE.ie_key())
diff --git a/hypervideo_dl/extractor/webofstories.py b/hypervideo_dl/extractor/webofstories.py
new file mode 100644
index 0000000..f2b8d19
--- /dev/null
+++ b/hypervideo_dl/extractor/webofstories.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ orderedSet,
+)
+
+
+class WebOfStoriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
+ _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
+ _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
+ _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
+ _TESTS = [{
+ 'url': 'http://www.webofstories.com/play/hans.bethe/71',
+ 'md5': '373e4dd915f60cfe3116322642ddf364',
+ 'info_dict': {
+ 'id': '4536',
+ 'ext': 'mp4',
+ 'title': 'The temperature of the sun',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Hans Bethe talks about calculating the temperature of the sun',
+ 'duration': 238,
+ }
+ }, {
+ 'url': 'http://www.webofstories.com/play/55908',
+ 'md5': '2985a698e1fe3211022422c4b5ed962c',
+ 'info_dict': {
+ 'id': '55908',
+ 'ext': 'mp4',
+ 'title': 'The story of Gemmata obscuriglobus',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+ 'duration': 169,
+ },
+ 'skip': 'notfound',
+ }, {
+ # malformed og:title meta
+ 'url': 'http://www.webofstories.com/play/54215?o=MS',
+ 'info_dict': {
+ 'id': '54215',
+ 'ext': 'mp4',
+ 'title': '"A Leg to Stand On"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Oliver Sacks talks about the death and resurrection of a limb',
+ 'duration': 97,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ # Sometimes og:title meta is malformed
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
+ description = self._html_search_meta('description', webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+ r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+ webpage, 'embed params').split(',')]
+
+ (
+ _, speaker_id, story_id, story_duration,
+ speaker_type, great_life, _thumbnail, _has_subtitles,
+ story_filename, _story_order) = embed_params
+
+ is_great_life_series = great_life == 'true'
+ duration = int_or_none(story_duration)
+
+ # URL building, see: http://www.webofstories.com/scripts/player.js
+ ms_prefix = ''
+ if speaker_type.lower() == 'ms':
+ ms_prefix = 'mini_sites/'
+
+ if is_great_life_series:
+ mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
+ self._VIDEO_DOMAIN, speaker_id, story_filename)
+ rtmp_ext = 'flv'
+ streamer = self._GREAT_LIFE_STREAMER
+ play_path = 'stories/{0:}/{1:}'.format(
+ speaker_id, story_filename)
+ else:
+ mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
+ self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
+ rtmp_ext = 'mp4'
+ streamer = self._USER_STREAMER
+ play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
+ ms_prefix, speaker_id, story_filename)
+
+ formats = [{
+ 'format_id': 'mp4_sd',
+ 'url': mp4_url,
+ }, {
+ 'format_id': 'rtmp_sd',
+ 'page_url': url,
+ 'url': streamer,
+ 'ext': rtmp_ext,
+ 'play_path': play_path,
+ }]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': story_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ }
+
+
+class WebOfStoriesPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.webofstories.com/playAll/donald.knuth',
+ 'info_dict': {
+ 'id': 'donald.knuth',
+ 'title': 'Donald Knuth (Scientist)',
+ },
+ 'playlist_mincount': 97,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result(
+ 'http://www.webofstories.com/play/%s' % video_id,
+ 'WebOfStories', video_id=video_id)
+ for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
+ ]
+
+ title = self._search_regex(
+ r'<div id="speakerName">\s*<span>([^<]+)</span>',
+ webpage, 'speaker', default=None)
+ if title:
+ field = self._search_regex(
+ r'<span id="primaryField">([^<]+)</span>',
+ webpage, 'field', default=None)
+ if field:
+ title += ' (%s)' % field
+
+ if not title:
+ title = self._search_regex(
+ r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>',
+ webpage, 'title')
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/hypervideo_dl/extractor/weibo.py b/hypervideo_dl/extractor/weibo.py
new file mode 100644
index 0000000..621df5b
--- /dev/null
+++ b/hypervideo_dl/extractor/weibo.py
@@ -0,0 +1,140 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+import json
+import random
+import re
+
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+)
+from ..utils import (
+ js_to_json,
+ strip_jsonp,
+ urlencode_postdata,
+)
+
+
+class WeiboIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
+ 'info_dict': {
+ 'id': 'Fp6RGfbff',
+ 'ext': 'mp4',
+ 'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ # to get Referer url for genvisitor
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ visitor_url = urlh.geturl()
+
+ if 'passport.weibo.com' in visitor_url:
+ # first visit
+ visitor_data = self._download_json(
+ 'https://passport.weibo.com/visitor/genvisitor', video_id,
+ note='Generating first-visit data',
+ transform_source=strip_jsonp,
+ headers={'Referer': visitor_url},
+ data=urlencode_postdata({
+ 'cb': 'gen_callback',
+ 'fp': json.dumps({
+ 'os': '2',
+ 'browser': 'Gecko57,0,0,0',
+ 'fonts': 'undefined',
+ 'screenInfo': '1440*900*24',
+ 'plugins': '',
+ }),
+ }))
+
+ tid = visitor_data['data']['tid']
+ cnfd = '%03d' % visitor_data['data']['confidence']
+
+ self._download_webpage(
+ 'https://passport.weibo.com/visitor/visitor', video_id,
+ note='Running first-visit callback',
+ query={
+ 'a': 'incarnate',
+ 't': tid,
+ 'w': 2,
+ 'c': cnfd,
+ 'cb': 'cross_domain',
+ 'from': 'weibo',
+ '_rand': random.random(),
+ })
+
+ webpage = self._download_webpage(
+ url, video_id, note='Revisiting webpage')
+
+ title = self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title')
+
+ video_formats = compat_parse_qs(self._search_regex(
+ r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
+
+ formats = []
+ supported_resolutions = (480, 720)
+ for res in supported_resolutions:
+ vid_urls = video_formats.get(compat_str(res))
+ if not vid_urls or not isinstance(vid_urls, list):
+ continue
+
+ vid_url = vid_urls[0]
+ formats.append({
+ 'url': vid_url,
+ 'height': res,
+ })
+
+ self._sort_formats(formats)
+
+ uploader = self._og_search_property(
+ 'nick-name', webpage, 'uploader', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader': uploader,
+ 'formats': formats
+ }
+
+
+class WeiboMobileIE(InfoExtractor):
+ _VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
+ _TEST = {
+ 'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
+ 'info_dict': {
+ 'id': '4189191225395228',
+ 'ext': 'mp4',
+ 'title': '午睡当然是要甜甜蜜蜜的啦',
+ 'uploader': '柴犬柴犬'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ # to get Referer url for genvisitor
+ webpage = self._download_webpage(url, video_id, note='visit the page')
+
+ weibo_info = self._parse_json(self._search_regex(
+ r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
+ webpage, 'js_code', flags=re.DOTALL),
+ video_id, transform_source=js_to_json)
+
+ status_data = weibo_info.get('status', {})
+ page_info = status_data.get('page_info')
+ title = status_data['status_title']
+ uploader = status_data.get('user', {}).get('screen_name')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader': uploader,
+ 'url': page_info['media_info']['stream_url']
+ }
diff --git a/hypervideo_dl/extractor/weiqitv.py b/hypervideo_dl/extractor/weiqitv.py
new file mode 100644
index 0000000..7e0befd
--- /dev/null
+++ b/hypervideo_dl/extractor/weiqitv.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class WeiqiTVIE(InfoExtractor):
+ IE_DESC = 'WQTV'
+ _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
+ 'md5': '26450599afd64c513bc77030ad15db44',
+ 'info_dict': {
+ 'id': '53c744f09874f0e76a8b46f3',
+ 'ext': 'mp4',
+ 'title': '2013年度盘点',
+ },
+ }, {
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569',
+ 'info_dict': {
+ 'id': '567379a2d4c36cca518b4569',
+ 'ext': 'mp4',
+ 'title': '民国围棋史',
+ },
+ }, {
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567',
+ 'info_dict': {
+ 'id': '5430220a9874f088658b4567',
+ 'ext': 'mp4',
+ 'title': '二路托过的手段和运用',
+ },
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ page = self._download_webpage(url, media_id)
+
+ info_json_str = self._search_regex(
+ r'var\s+video\s*=\s*(.+});', page, 'info json str')
+ info_json = self._parse_json(info_json_str, media_id)
+
+ letvcloud_url = self._search_regex(
+ r'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'LetvCloud',
+ 'url': letvcloud_url,
+ 'title': info_json['name'],
+ 'id': media_id,
+ }
diff --git a/hypervideo_dl/extractor/wistia.py b/hypervideo_dl/extractor/wistia.py
new file mode 100644
index 0000000..ae32a0a
--- /dev/null
+++ b/hypervideo_dl/extractor/wistia.py
@@ -0,0 +1,199 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ unescapeHTML,
+)
+
+
+class WistiaBaseIE(InfoExtractor):
+ _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
+ _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
+ _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
+
+ def _download_embed_config(self, config_type, config_id, referer):
+ base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
+ embed_config = self._download_json(
+ base_url + '.json', config_id, headers={
+ 'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
+ })
+
+ if isinstance(embed_config, dict) and embed_config.get('error'):
+ raise ExtractorError(
+ 'Error while getting the playlist', expected=True)
+
+ return embed_config
+
+ def _extract_media(self, embed_config):
+ data = embed_config['media']
+ video_id = data['hashedId']
+ title = data['name']
+
+ formats = []
+ thumbnails = []
+ for a in data['assets']:
+ aurl = a.get('url')
+ if not aurl:
+ continue
+ astatus = a.get('status')
+ atype = a.get('type')
+ if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
+ continue
+ elif atype in ('still', 'still_image'):
+ thumbnails.append({
+ 'url': aurl,
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'filesize': int_or_none(a.get('size')),
+ })
+ else:
+ aext = a.get('ext')
+ display_name = a.get('display_name')
+ format_id = atype
+ if atype and atype.endswith('_video') and display_name:
+ format_id = '%s-%s' % (atype[:-6], display_name)
+ f = {
+ 'format_id': format_id,
+ 'url': aurl,
+ 'tbr': int_or_none(a.get('bitrate')) or None,
+ 'preference': 1 if atype == 'original' else None,
+ }
+ if display_name == 'Audio':
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'vcodec': a.get('codec'),
+ })
+ if a.get('container') == 'm3u8' or aext == 'm3u8':
+ ts_f = f.copy()
+ ts_f.update({
+ 'ext': 'ts',
+ 'format_id': f['format_id'].replace('hls-', 'ts-'),
+ 'url': f['url'].replace('.bin', '.ts'),
+ })
+ formats.append(ts_f)
+ f.update({
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ })
+ else:
+ f.update({
+ 'container': a.get('container'),
+ 'ext': aext,
+ 'filesize': int_or_none(a.get('size')),
+ })
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption in data.get('captions', []):
+ language = caption.get('language')
+ if not language:
+ continue
+ subtitles[language] = [{
+ 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': data.get('seoDescription'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(data.get('duration')),
+ 'timestamp': int_or_none(data.get('createdAt')),
+ 'subtitles': subtitles,
+ }
+
+
+class WistiaIE(WistiaBaseIE):
+ _VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
+
+ _TESTS = [{
+ # with hls video
+ 'url': 'wistia:807fafadvk',
+ 'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video',
+ 'upload_date': '20160518',
+ 'timestamp': 1463607249,
+ 'duration': 4987.11,
+ },
+ }, {
+ 'url': 'wistia:sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+ 'only_matching': True,
+ }]
+
+ # https://wistia.com/support/embed-and-share/video-on-your-website
+ @staticmethod
+ def _extract_url(webpage):
+ urls = WistiaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ for match in re.finditer(
+ r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
+ urls.append(unescapeHTML(match.group('url')))
+ for match in re.finditer(
+ r'''(?sx)
+ <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
+ ''', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ return urls
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ embed_config = self._download_embed_config('media', video_id, url)
+ return self._extract_media(embed_config)
+
+
+class WistiaPlaylistIE(WistiaBaseIE):
+ _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
+
+ _TEST = {
+ 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
+ 'info_dict': {
+ 'id': 'aodt9etokc',
+ },
+ 'playlist_count': 3,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist = self._download_embed_config('playlist', playlist_id, url)
+
+ entries = []
+ for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
+ embed_config = media.get('embed_config')
+ if not embed_config:
+ continue
+ entries.append(self._extract_media(embed_config))
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/worldstarhiphop.py b/hypervideo_dl/extractor/worldstarhiphop.py
new file mode 100644
index 0000000..82587b4
--- /dev/null
+++ b/hypervideo_dl/extractor/worldstarhiphop.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class WorldStarHipHopIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
+ 'md5': '9d04de741161603bf7071bbf4e883186',
+ 'info_dict': {
+ 'id': 'wshh6a7q1ny0G34ZwuIO',
+ 'ext': 'mp4',
+ 'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
+ }
+ }, {
+ 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+
+ if not entries:
+ return self.url_result(url, 'Generic')
+
+ title = self._html_search_regex(
+ [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+ r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
+ webpage, 'title')
+
+ info = entries[0]
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/wsj.py b/hypervideo_dl/extractor/wsj.py
new file mode 100644
index 0000000..67236f3
--- /dev/null
+++ b/hypervideo_dl/extractor/wsj.py
@@ -0,0 +1,123 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class WSJIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
+ https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
+ wsj:
+ )
+ (?P<id>[a-fA-F0-9-]{36})
+ '''
+ IE_DESC = 'Wall Street Journal'
+ _TESTS = [{
+ 'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
+ 'md5': 'e230a5bb249075e40793b655a54a02e4',
+ 'info_dict': {
+ 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
+ 'ext': 'mp4',
+ 'upload_date': '20150202',
+ 'uploader_id': 'jdesai',
+ 'creator': 'jdesai',
+ 'categories': list, # a long list
+ 'duration': 90,
+ 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
+ },
+ }, {
+ 'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
+ query={
+ 'type': 'guid',
+ 'count': 1,
+ 'query': video_id,
+ 'fields': ','.join((
+ 'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
+ 'description', 'name', 'duration', 'videoURL', 'titletag',
+ 'formattedCreationDate', 'keywords', 'editor')),
+ })['items'][0]
+ title = info.get('name', info.get('titletag'))
+
+ formats = []
+
+ f4m_url = info.get('videoURL')
+ if f4m_url:
+ formats.extend(self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False))
+
+ m3u8_url = info.get('hls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ info['hls'], video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ for v in info.get('videoMP4List', []):
+ mp4_url = v.get('url')
+ if not mp4_url:
+ continue
+ tbr = int_or_none(v.get('bitrate'))
+ formats.append({
+ 'url': mp4_url,
+ 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'tbr': tbr,
+ 'width': int_or_none(v.get('width')),
+ 'height': int_or_none(v.get('height')),
+ 'fps': float_or_none(v.get('fps')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ # Thumbnails are conveniently in the correct format already
+ 'thumbnails': info.get('thumbnailList'),
+ 'creator': info.get('author'),
+ 'uploader_id': info.get('editor'),
+ 'duration': int_or_none(info.get('duration')),
+ 'upload_date': unified_strdate(info.get(
+ 'formattedCreationDate'), day_first=False),
+ 'title': title,
+ 'categories': info.get('keywords'),
+ }
+
+
+class WSJArticleIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
+ 'info_dict': {
+ 'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
+ 'ext': 'mp4',
+ 'upload_date': '20170221',
+ 'uploader_id': 'ralcaraz',
+ 'title': 'Bao Bao the Panda Leaves for China',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ video_id = self._search_regex(
+ r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
+ return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
diff --git a/hypervideo_dl/extractor/wwe.py b/hypervideo_dl/extractor/wwe.py
new file mode 100644
index 0000000..bebc77b
--- /dev/null
+++ b/hypervideo_dl/extractor/wwe.py
@@ -0,0 +1,140 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ urljoin,
+)
+
+
+class WWEBaseIE(InfoExtractor):
+ _SUBTITLE_LANGS = {
+ 'English': 'en',
+ 'Deutsch': 'de',
+ }
+
+ def _extract_entry(self, data, url, video_id=None):
+ video_id = compat_str(video_id or data['nid'])
+ title = data['title']
+
+ formats = self._extract_m3u8_formats(
+ data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ description = data.get('description')
+ thumbnail = urljoin(url, data.get('image'))
+ series = data.get('show_name')
+ episode = data.get('episode_name')
+
+ subtitles = {}
+ tracks = data.get('tracks')
+ if isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ track_file = url_or_none(track.get('file'))
+ if not track_file:
+ continue
+ label = track.get('label')
+ lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': track_file,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'series': series,
+ 'episode': episode,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class WWEIE(WWEBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
+ 'md5': '92811c6a14bfc206f7a6a9c5d9140184',
+ 'info_dict': {
+ 'id': '40048199',
+ 'ext': 'mp4',
+ 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
+ 'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ landing = self._parse_json(
+ self._html_search_regex(
+ r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
+ webpage, 'drupal settings'),
+ display_id)['WWEVideoLanding']
+
+ data = landing['initialVideo']['playlist'][0]
+ video_id = landing.get('initialVideoId')
+
+ info = self._extract_entry(data, url, video_id)
+ info['display_id'] = display_id
+ return info
+
+
+class WWEPlaylistIE(WWEBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.wwe.com/shows/raw/2018-11-12',
+ 'info_dict': {
+ 'id': '2018-11-12',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
+ video = self._parse_json(
+ mobj.group('data'), display_id, transform_source=unescapeHTML,
+ fatal=False)
+ if not video:
+ continue
+ data = try_get(video, lambda x: x['playlist'][0], dict)
+ if not data:
+ continue
+ try:
+ entry = self._extract_entry(data, url)
+ except Exception:
+ continue
+ entry['extractor_key'] = WWEIE.ie_key()
+ entries.append(entry)
+
+ return self.playlist_result(entries, display_id)
diff --git a/hypervideo_dl/extractor/xbef.py b/hypervideo_dl/extractor/xbef.py
new file mode 100644
index 0000000..4c41e98
--- /dev/null
+++ b/hypervideo_dl/extractor/xbef.py
@@ -0,0 +1,44 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class XBefIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
+ 'md5': 'a478b565baff61634a98f5e5338be995',
+ 'info_dict': {
+ 'id': '5119',
+ 'ext': 'mp4',
+ 'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
+ 'age_limit': 18,
+ 'thumbnail': r're:^http://.*\.jpg',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
+
+ config_url_enc = self._download_webpage(
+ 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
+ note='Retrieving config URL')
+ config_url = compat_urllib_parse_unquote(config_url_enc)
+ config = self._download_xml(
+ config_url, video_id, note='Retrieving config')
+
+ video_url = config.find('./file').text
+ thumbnail = config.find('./image').text
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/xboxclips.py b/hypervideo_dl/extractor/xboxclips.py
new file mode 100644
index 0000000..25f487e
--- /dev/null
+++ b/hypervideo_dl/extractor/xboxclips.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ month_by_abbreviation,
+ parse_filesize,
+)
+
+
+class XboxClipsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:xboxclips\.com|gameclips\.io)/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
+ 'info_dict': {
+ 'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'ext': 'mp4',
+ 'title': 'iAbdulElah playing Titanfall',
+ 'filesize_approx': 26800000,
+ 'upload_date': '20140807',
+ 'duration': 56,
+ }
+ }, {
+ 'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if '/video.php' in url:
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
+
+ webpage = self._download_webpage(url, video_id)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ upload_date = None
+ mobj = re.search(
+ r'>Recorded: (\d{2})-(Jan|Feb|Mar|Apr|May|Ju[nl]|Aug|Sep|Oct|Nov|Dec)-(\d{4})',
+ webpage)
+ if mobj:
+ upload_date = '%s%.2d%s' % (mobj.group(3), month_by_abbreviation(mobj.group(2)), mobj.group(1))
+ filesize = parse_filesize(self._html_search_regex(
+ r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
+ duration = int_or_none(self._html_search_regex(
+ r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'>Views: (\d+)<', webpage, 'view count', fatal=False))
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'filesize_approx': filesize,
+ 'duration': duration,
+ 'view_count': view_count,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/xfileshare.py b/hypervideo_dl/extractor/xfileshare.py
new file mode 100644
index 0000000..df9efa9
--- /dev/null
+++ b/hypervideo_dl/extractor/xfileshare.py
@@ -0,0 +1,201 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+ decode_packed_codes,
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ urlencode_postdata,
+)
+
+
+# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
+def aa_decode(aa_code):
+ symbol_table = [
+ ('7', '((゚ー゚) + (o^_^o))'),
+ ('6', '((o^_^o) +(o^_^o))'),
+ ('5', '((゚ー゚) + (゚Θ゚))'),
+ ('2', '((o^_^o) - (゚Θ゚))'),
+ ('4', '(゚ー゚)'),
+ ('3', '(o^_^o)'),
+ ('1', '(゚Θ゚)'),
+ ('0', '(c^_^o)'),
+ ]
+ delim = '(゚Д゚)[゚ε゚]+'
+ ret = ''
+ for aa_char in aa_code.split(delim):
+ for val, pat in symbol_table:
+ aa_char = aa_char.replace(pat, val)
+ aa_char = aa_char.replace('+ ', '')
+ m = re.match(r'^\d+', aa_char)
+ if m:
+ ret += compat_chr(int(m.group(0), 8))
+ else:
+ m = re.match(r'^u([\da-f]+)', aa_char)
+ if m:
+ ret += compat_chr(int(m.group(1), 16))
+ return ret
+
+
+class XFileShareIE(InfoExtractor):
+ _SITES = (
+ (r'aparat\.cam', 'Aparat'),
+ (r'clipwatching\.com', 'ClipWatching'),
+ (r'gounlimited\.to', 'GoUnlimited'),
+ (r'govid\.me', 'GoVid'),
+ (r'holavid\.com', 'HolaVid'),
+ (r'streamty\.com', 'Streamty'),
+ (r'thevideobee\.to', 'TheVideoBee'),
+ (r'uqload\.com', 'Uqload'),
+ (r'vidbom\.com', 'VidBom'),
+ (r'vidlo\.us', 'vidlo'),
+ (r'vidlocker\.xyz', 'VidLocker'),
+ (r'vidshare\.tv', 'VidShare'),
+ (r'vup\.to', 'VUp'),
+ (r'wolfstream\.tv', 'WolfStream'),
+ (r'xvideosharing\.com', 'XVideoSharing'),
+ )
+
+ IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
+ _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ % '|'.join(site for site in list(zip(*_SITES))[0]))
+
+ _FILE_NOT_FOUND_REGEXES = (
+ r'>(?:404 - )?File Not Found<',
+ r'>The file was removed by administrator<',
+ )
+
+ _TESTS = [{
+ 'url': 'http://xvideosharing.com/fq65f94nd2ve',
+ 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
+ 'info_dict': {
+ 'id': 'fq65f94nd2ve',
+ 'ext': 'mp4',
+ 'title': 'sample',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://aparat.cam/n4d6dh0wvlpr',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wolfstream.tv/nthme29v9u2x',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
+ % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
+ webpage)]
+
+ def _real_extract(self, url):
+ host, video_id = re.match(self._VALID_URL, url).groups()
+
+ url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = self._hidden_inputs(webpage)
+
+ if fields.get('op') == 'download1':
+ countdown = int_or_none(self._search_regex(
+ r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
+ webpage, 'countdown', default=None))
+ if countdown:
+ self._sleep(countdown, video_id)
+
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading video page',
+ data=urlencode_postdata(fields), headers={
+ 'Referer': url,
+ 'Content-type': 'application/x-www-form-urlencoded',
+ })
+
+ title = (self._search_regex(
+ (r'style="z-index: [0-9]+;">([^<]+)</span>',
+ r'<td nowrap>([^<]+)</td>',
+ r'h4-fine[^>]*>([^<]+)<',
+ r'>Watch (.+)[ <]',
+ r'<h2 class="video-page-head">([^<]+)</h2>',
+ r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
+ r'title\s*:\s*"([^"]+)"'), # govid.me
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None) or video_id).strip()
+
+ for regex, func in (
+ (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
+ (r'(゚.+)', aa_decode)):
+ obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
+ if obf_code:
+ webpage = webpage.replace(obf_code, func(obf_code))
+
+ formats = []
+
+ jwplayer_data = self._search_regex(
+ [
+ r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
+ r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
+ ], webpage,
+ 'jwplayer data', default=None)
+ if jwplayer_data:
+ jwplayer_data = self._parse_json(
+ jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
+ if jwplayer_data:
+ formats = self._parse_jwplayer_data(
+ jwplayer_data, video_id, False,
+ m3u8_id='hls', mpd_id='dash')['formats']
+
+ if not formats:
+ urls = []
+ for regex in (
+ r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
+ r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
+ r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
+ r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
+ for mobj in re.finditer(regex, webpage):
+ video_url = mobj.group('url')
+ if video_url not in urls:
+ urls.append(video_url)
+
+ sources = self._search_regex(
+ r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
+ if sources:
+ urls.extend(self._parse_json(sources, video_id))
+
+ formats = []
+ for video_url in urls:
+ if determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'sd',
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._search_regex(
+ [
+ r'<video[^>]+poster="([^"]+)"',
+ r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
+ ], webpage, 'thumbnail', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/xhamster.py b/hypervideo_dl/extractor/xhamster.py
new file mode 100644
index 0000000..f73b977
--- /dev/null
+++ b/hypervideo_dl/extractor/xhamster.py
@@ -0,0 +1,450 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ determine_ext,
+ dict_get,
+ extract_attributes,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_duration,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ url_or_none,
+ urljoin,
+)
+
+
+class XHamsterIE(InfoExtractor):
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:.+?\.)?%s/
+ (?:
+ movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
+ videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
+ )
+ ''' % _DOMAINS
+ _TESTS = [{
+ 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'md5': '98b4687efb1ffd331c4197854dc09e8f',
+ 'info_dict': {
+ 'id': '1509445',
+ 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
+ 'ext': 'mp4',
+ 'title': 'FemaleAgent Shy beauty takes the bait',
+ 'timestamp': 1350194821,
+ 'upload_date': '20121014',
+ 'uploader': 'Ruseful2011',
+ 'duration': 893,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
+ 'info_dict': {
+ 'id': '2221348',
+ 'display_id': 'britney-spears-sexy-booty',
+ 'ext': 'mp4',
+ 'title': 'Britney Spears Sexy Booty',
+ 'timestamp': 1379123460,
+ 'upload_date': '20130914',
+ 'uploader': 'jojo747400',
+ 'duration': 200,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # empty seo, unavailable via new URL schema
+ 'url': 'http://xhamster.com/movies/5667973/.html',
+ 'info_dict': {
+ 'id': '5667973',
+ 'ext': 'mp4',
+ 'title': '....',
+ 'timestamp': 1454948101,
+ 'upload_date': '20160208',
+ 'uploader': 'parejafree',
+ 'duration': 72,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # mobile site
+ 'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+ 'only_matching': True,
+ }, {
+ # This video is visible for marcoalfa123456's friends only
+ 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
+ 'only_matching': True,
+ }, {
+ # new URL schema
+ 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ display_id = mobj.group('display_id') or mobj.group('display_id_2')
+
+ desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
+ webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
+
+ error = self._html_search_regex(
+ r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ age_limit = self._rta_search(webpage)
+
+ def get_height(s):
+ return int_or_none(self._search_regex(
+ r'^(\d+)[pP]', s, 'height', default=None))
+
+ initials = self._parse_json(
+ self._search_regex(
+ (r'window\.initials\s*=\s*({.+?})\s*;\s*</script>',
+ r'window\.initials\s*=\s*({.+?})\s*;'), webpage, 'initials',
+ default='{}'),
+ video_id, fatal=False)
+ if initials:
+ video = initials['videoModel']
+ title = video['title']
+ formats = []
+ format_urls = set()
+ format_sizes = {}
+ sources = try_get(video, lambda x: x['sources'], dict) or {}
+ for format_id, formats_dict in sources.items():
+ if not isinstance(formats_dict, dict):
+ continue
+ download_sources = try_get(sources, lambda x: x['download'], dict) or {}
+ for quality, format_dict in download_sources.items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_sizes[quality] = float_or_none(format_dict.get('size'))
+ for quality, format_item in formats_dict.items():
+ if format_id == 'download':
+ # Download link takes some time to be generated,
+ # skipping for now
+ continue
+ format_url = format_item
+ format_url = url_or_none(format_url)
+ if not format_url or format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, quality),
+ 'url': format_url,
+ 'ext': determine_ext(format_url, 'mp4'),
+ 'height': get_height(quality),
+ 'filesize': format_sizes.get(quality),
+ 'http_headers': {
+ 'Referer': urlh.geturl(),
+ },
+ })
+ xplayer_sources = try_get(
+ initials, lambda x: x['xplayerSettings']['sources'], dict)
+ if xplayer_sources:
+ hls_sources = xplayer_sources.get('hls')
+ if isinstance(hls_sources, dict):
+ for hls_format_key in ('url', 'fallback'):
+ hls_url = hls_sources.get(hls_format_key)
+ if not hls_url:
+ continue
+ hls_url = urljoin(url, hls_url)
+ if not hls_url or hls_url in format_urls:
+ continue
+ format_urls.add(hls_url)
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ standard_sources = xplayer_sources.get('standard')
+ if isinstance(standard_sources, dict):
+ for format_id, formats_list in standard_sources.items():
+ if not isinstance(formats_list, list):
+ continue
+ for standard_format in formats_list:
+ if not isinstance(standard_format, dict):
+ continue
+ for standard_format_key in ('url', 'fallback'):
+ standard_url = standard_format.get(standard_format_key)
+ if not standard_url:
+ continue
+ standard_url = urljoin(url, standard_url)
+ if not standard_url or standard_url in format_urls:
+ continue
+ format_urls.add(standard_url)
+ ext = determine_ext(standard_url, 'mp4')
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ quality = (str_or_none(standard_format.get('quality'))
+ or str_or_none(standard_format.get('label'))
+ or '')
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, quality),
+ 'url': standard_url,
+ 'ext': ext,
+ 'height': get_height(quality),
+ 'filesize': format_sizes.get(quality),
+ 'http_headers': {
+ 'Referer': standard_url,
+ },
+ })
+ self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+ categories_list = video.get('categories')
+ if isinstance(categories_list, list):
+ categories = []
+ for c in categories_list:
+ if not isinstance(c, dict):
+ continue
+ c_name = c.get('name')
+ if isinstance(c_name, compat_str):
+ categories.append(c_name)
+ else:
+ categories = None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('created')),
+ 'uploader': try_get(
+ video, lambda x: x['author']['name'], compat_str),
+ 'thumbnail': video.get('thumbURL'),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(try_get(
+ video, lambda x: x['rating']['likes'], int)),
+ 'dislike_count': int_or_none(try_get(
+ video, lambda x: x['rating']['dislikes'], int)),
+ 'comment_count': int_or_none(video.get('views')),
+ 'age_limit': age_limit,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+ # Old layout fallback
+
+ title = self._html_search_regex(
+ [r'<h1[^>]*>([^<]+)</h1>',
+ r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
+ r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
+ webpage, 'title')
+
+ formats = []
+ format_urls = set()
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
+ default='{}'),
+ video_id, fatal=False)
+ for format_id, format_url in sources.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': get_height(format_id),
+ })
+
+ video_url = self._search_regex(
+ [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
+ r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
+ r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
+ webpage, 'video url', group='mp4', default=None)
+ if video_url and video_url not in format_urls:
+ formats.append({
+ 'url': video_url,
+ })
+
+ self._sort_formats(formats)
+
+ # Only a few videos have an description
+ mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
+ description = mobj.group(1) if mobj else None
+
+ upload_date = unified_strdate(self._search_regex(
+ r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
+ webpage, 'upload date', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
+ webpage, 'uploader', default='anonymous')
+
+ thumbnail = self._search_regex(
+ [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
+ r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
+
+ duration = parse_duration(self._search_regex(
+ [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
+ r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
+ 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ r'content=["\']User(?:View|Play)s:(\d+)',
+ webpage, 'view count', fatal=False))
+
+ mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
+ (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
+
+ mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
+ comment_count = mobj.group('commentcount') if mobj else 0
+
+ categories_html = self._search_regex(
+ r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
+ 'categories', default=None)
+ categories = [clean_html(category) for category in re.findall(
+ r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': int_or_none(like_count),
+ 'dislike_count': int_or_none(dislike_count),
+ 'comment_count': int_or_none(comment_count),
+ 'age_limit': age_limit,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+
+class XHamsterEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+ _TEST = {
+ 'url': 'http://xhamster.com/xembed.php?video=3328539',
+ 'info_dict': {
+ 'id': '3328539',
+ 'ext': 'mp4',
+ 'title': 'Pen Masturbation',
+ 'timestamp': 1406581861,
+ 'upload_date': '20140728',
+ 'uploader': 'ManyakisArt',
+ 'duration': 5,
+ 'age_limit': 18,
+ }
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
+ webpage, 'xhamster url', default=None)
+
+ if not video_url:
+ vars = self._parse_json(
+ self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
+ video_id)
+ video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
+
+ return self.url_result(video_url, 'XHamster')
+
+
+class XHamsterUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
+ _TESTS = [{
+ # Paginated user profile
+ 'url': 'https://xhamster.com/users/netvideogirls/videos',
+ 'info_dict': {
+ 'id': 'netvideogirls',
+ },
+ 'playlist_mincount': 267,
+ }, {
+ # Non-paginated user profile
+ 'url': 'https://xhamster.com/users/firatkaan/videos',
+ 'info_dict': {
+ 'id': 'firatkaan',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ def _entries(self, user_id):
+ next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
+ for pagenum in itertools.count(1):
+ page = self._download_webpage(
+ next_page_url, user_id, 'Downloading page %s' % pagenum)
+ for video_tag in re.findall(
+ r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
+ page):
+ video = extract_attributes(video_tag)
+ video_url = url_or_none(video.get('href'))
+ if not video_url or not XHamsterIE.suitable(video_url):
+ continue
+ video_id = XHamsterIE._match_id(video_url)
+ yield self.url_result(
+ video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
+ mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
+ if not mobj:
+ break
+ next_page = extract_attributes(mobj.group(0))
+ next_page_url = url_or_none(next_page.get('href'))
+ if not next_page_url:
+ break
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self.playlist_result(self._entries(user_id), user_id)
diff --git a/hypervideo_dl/extractor/xiami.py b/hypervideo_dl/extractor/xiami.py
new file mode 100644
index 0000000..769aab3
--- /dev/null
+++ b/hypervideo_dl/extractor/xiami.py
@@ -0,0 +1,201 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import int_or_none
+
+
+class XiamiBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id'
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
+ if '>Xiami is currently not available in your country.<' in webpage:
+ self.raise_geo_restricted('Xiami is currently not available in your country')
+ return webpage
+
+ def _extract_track(self, track, track_id=None):
+ track_name = track.get('songName') or track.get('name') or track['subName']
+ artist = track.get('artist') or track.get('artist_name') or track.get('singers')
+ title = '%s - %s' % (artist, track_name) if artist else track_name
+ track_url = self._decrypt(track['location'])
+
+ subtitles = {}
+ lyrics_url = track.get('lyric_url') or track.get('lyric')
+ if lyrics_url and lyrics_url.startswith('http'):
+ subtitles['origin'] = [{'url': lyrics_url}]
+
+ return {
+ 'id': track.get('song_id') or track_id,
+ 'url': track_url,
+ 'title': title,
+ 'thumbnail': track.get('pic') or track.get('album_pic'),
+ 'duration': int_or_none(track.get('length')),
+ 'creator': track.get('artist', '').split(';')[0],
+ 'track': track_name,
+ 'track_number': int_or_none(track.get('track')),
+ 'album': track.get('album_name') or track.get('title'),
+ 'artist': artist,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_tracks(self, item_id, referer, typ=None):
+ playlist = self._download_json(
+ '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''),
+ item_id, headers={
+ 'Referer': referer,
+ })
+ return [
+ self._extract_track(track, item_id)
+ for track in playlist['data']['trackList']]
+
+ @staticmethod
+ def _decrypt(origin):
+ n = int(origin[0])
+ origin = origin[1:]
+ short_length = len(origin) // n
+ long_num = len(origin) - short_length * n
+ l = tuple()
+ for i in range(0, n):
+ length = short_length
+ if i < long_num:
+ length += 1
+ l += (origin[0:length], )
+ origin = origin[length:]
+ ans = ''
+ for i in range(0, short_length + 1):
+ for j in range(0, n):
+ if len(l[j]) > i:
+ ans += l[j][i]
+ return compat_urllib_parse_unquote(ans).replace('^', '0')
+
+
+class XiamiSongIE(XiamiBaseIE):
+ IE_NAME = 'xiami:song'
+ IE_DESC = '虾米音乐'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.xiami.com/song/1775610518',
+ 'md5': '521dd6bea40fd5c9c69f913c232cb57e',
+ 'info_dict': {
+ 'id': '1775610518',
+ 'ext': 'mp3',
+ 'title': 'HONNE - Woman',
+ 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+ 'duration': 265,
+ 'creator': 'HONNE',
+ 'track': 'Woman',
+ 'album': 'Woman',
+ 'artist': 'HONNE',
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ },
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://www.xiami.com/song/1775256504',
+ 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
+ 'info_dict': {
+ 'id': '1775256504',
+ 'ext': 'mp3',
+ 'title': '戴荃 - 悟空',
+ 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+ 'duration': 200,
+ 'creator': '戴荃',
+ 'track': '悟空',
+ 'album': '悟空',
+ 'artist': '戴荃',
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ },
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://www.xiami.com/song/1775953850',
+ 'info_dict': {
+ 'id': '1775953850',
+ 'ext': 'mp3',
+ 'title': 'До Скону - Чума Пожирает Землю',
+ 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+ 'duration': 683,
+ 'creator': 'До Скону',
+ 'track': 'Чума Пожирает Землю',
+ 'track_number': 7,
+ 'album': 'Ад',
+ 'artist': 'До Скону',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.xiami.com/song/xLHGwgd07a1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_tracks(self._match_id(url), url)[0]
+
+
+class XiamiPlaylistBaseIE(XiamiBaseIE):
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id)
+
+
+class XiamiAlbumIE(XiamiPlaylistBaseIE):
+ IE_NAME = 'xiami:album'
+ IE_DESC = '虾米音乐 - 专辑'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[^/?#&]+)'
+ _TYPE = '1'
+ _TESTS = [{
+ 'url': 'http://www.xiami.com/album/2100300444',
+ 'info_dict': {
+ 'id': '2100300444',
+ },
+ 'playlist_count': 10,
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.xiami.com/album/URVDji2a506',
+ 'only_matching': True,
+ }]
+
+
+class XiamiArtistIE(XiamiPlaylistBaseIE):
+ IE_NAME = 'xiami:artist'
+ IE_DESC = '虾米音乐 - 歌手'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[^/?#&]+)'
+ _TYPE = '2'
+ _TESTS = [{
+ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
+ 'info_dict': {
+ 'id': '2132',
+ },
+ 'playlist_count': 20,
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99',
+ 'only_matching': True,
+ }]
+
+
+class XiamiCollectionIE(XiamiPlaylistBaseIE):
+ IE_NAME = 'xiami:collection'
+ IE_DESC = '虾米音乐 - 精选集'
+ _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[^/?#&]+)'
+ _TYPE = '3'
+ _TEST = {
+ 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
+ 'info_dict': {
+ 'id': '156527391',
+ },
+ 'playlist_mincount': 29,
+ 'skip': 'Georestricted',
+ }
diff --git a/hypervideo_dl/extractor/ximalaya.py b/hypervideo_dl/extractor/ximalaya.py
new file mode 100644
index 0000000..a912e54
--- /dev/null
+++ b/hypervideo_dl/extractor/ximalaya.py
@@ -0,0 +1,233 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+
+
+class XimalayaBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CN']
+
+
+class XimalayaIE(XimalayaBaseIE):
+ IE_NAME = 'ximalaya'
+ IE_DESC = '喜马拉雅FM'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/sound/(?P<id>[0-9]+)'
+ _USER_URL_FORMAT = '%s://www.ximalaya.com/zhubo/%i/'
+ _TESTS = [
+ {
+ 'url': 'http://www.ximalaya.com/61425525/sound/47740352/',
+ 'info_dict': {
+ 'id': '47740352',
+ 'ext': 'm4a',
+ 'uploader': '小彬彬爱听书',
+ 'uploader_id': 61425525,
+ 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
+ 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
+ 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
+ 'thumbnails': [
+ {
+ 'name': 'cover_url',
+ 'url': r're:^https?://.*\.jpg$',
+ },
+ {
+ 'name': 'cover_url_142',
+ 'url': r're:^https?://.*\.jpg$',
+ 'width': 180,
+ 'height': 180
+ }
+ ],
+ 'categories': ['renwen', '人文'],
+ 'duration': 93,
+ 'view_count': int,
+ 'like_count': int,
+ }
+ },
+ {
+ 'url': 'http://m.ximalaya.com/61425525/sound/47740352/',
+ 'info_dict': {
+ 'id': '47740352',
+ 'ext': 'm4a',
+ 'uploader': '小彬彬爱听书',
+ 'uploader_id': 61425525,
+ 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
+ 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
+ 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
+ 'thumbnails': [
+ {
+ 'name': 'cover_url',
+ 'url': r're:^https?://.*\.jpg$',
+ },
+ {
+ 'name': 'cover_url_142',
+ 'url': r're:^https?://.*\.jpg$',
+ 'width': 180,
+ 'height': 180
+ }
+ ],
+ 'categories': ['renwen', '人文'],
+ 'duration': 93,
+ 'view_count': int,
+ 'like_count': int,
+ }
+ },
+ {
+ 'url': 'https://www.ximalaya.com/11045267/sound/15705996/',
+ 'info_dict': {
+ 'id': '15705996',
+ 'ext': 'm4a',
+ 'uploader': '李延隆老师',
+ 'uploader_id': 11045267,
+ 'uploader_url': 'https://www.ximalaya.com/zhubo/11045267/',
+ 'title': 'Lesson 1 Excuse me!',
+ 'description': "contains:Listen to the tape then answer\xa0this question. Whose handbag is it?\n"
+ "听录音,然后回答问题,这是谁的手袋?",
+ 'thumbnails': [
+ {
+ 'name': 'cover_url',
+ 'url': r're:^https?://.*\.jpg$',
+ },
+ {
+ 'name': 'cover_url_142',
+ 'url': r're:^https?://.*\.jpg$',
+ 'width': 180,
+ 'height': 180
+ }
+ ],
+ 'categories': ['train', '外语'],
+ 'duration': 40,
+ 'view_count': int,
+ 'like_count': int,
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+
+ is_m = 'm.ximalaya' in url
+ scheme = 'https' if url.startswith('https') else 'http'
+
+ audio_id = self._match_id(url)
+ webpage = self._download_webpage(url, audio_id,
+ note='Download sound page for %s' % audio_id,
+ errnote='Unable to get sound page')
+
+ audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id)
+ audio_info = self._download_json(audio_info_file, audio_id,
+ 'Downloading info json %s' % audio_info_file,
+ 'Unable to download info file')
+
+ formats = []
+ for bps, k in (('24k', 'play_path_32'), ('64k', 'play_path_64')):
+ if audio_info.get(k):
+ formats.append({
+ 'format_id': bps,
+ 'url': audio_info[k],
+ })
+
+ thumbnails = []
+ for k in audio_info.keys():
+ # cover pics kyes like: cover_url', 'cover_url_142'
+ if k.startswith('cover_url'):
+ thumbnail = {'name': k, 'url': audio_info[k]}
+ if k == 'cover_url_142':
+ thumbnail['width'] = 180
+ thumbnail['height'] = 180
+ thumbnails.append(thumbnail)
+
+ audio_uploader_id = audio_info.get('uid')
+
+ if is_m:
+ audio_description = self._html_search_regex(r'(?s)<section\s+class=["\']content[^>]+>(.+?)</section>',
+ webpage, 'audio_description', fatal=False)
+ else:
+ audio_description = self._html_search_regex(r'(?s)<div\s+class=["\']rich_intro[^>]*>(.+?</article>)',
+ webpage, 'audio_description', fatal=False)
+
+ if not audio_description:
+ audio_description_file = '%s://www.ximalaya.com/sounds/%s/rich_intro' % (scheme, audio_id)
+ audio_description = self._download_webpage(audio_description_file, audio_id,
+ note='Downloading description file %s' % audio_description_file,
+ errnote='Unable to download descrip file',
+ fatal=False)
+ audio_description = audio_description.strip() if audio_description else None
+
+ return {
+ 'id': audio_id,
+ 'uploader': audio_info.get('nickname'),
+ 'uploader_id': audio_uploader_id,
+ 'uploader_url': self._USER_URL_FORMAT % (scheme, audio_uploader_id) if audio_uploader_id else None,
+ 'title': audio_info['title'],
+ 'thumbnails': thumbnails,
+ 'description': audio_description,
+ 'categories': list(filter(None, (audio_info.get('category_name'), audio_info.get('category_title')))),
+ 'duration': audio_info.get('duration'),
+ 'view_count': audio_info.get('play_count'),
+ 'like_count': audio_info.get('favorites_count'),
+ 'formats': formats,
+ }
+
+
+class XimalayaAlbumIE(XimalayaBaseIE):
+ IE_NAME = 'ximalaya:album'
+ IE_DESC = '喜马拉雅FM 专辑'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/album/(?P<id>[0-9]+)'
+ _TEMPLATE_URL = '%s://www.ximalaya.com/%s/album/%s/'
+ _BASE_URL_TEMPL = '%s://www.ximalaya.com%s'
+ _LIST_VIDEO_RE = r'<a[^>]+?href="(?P<url>/%s/sound/(?P<id>\d+)/?)"[^>]+?title="(?P<title>[^>]+)">'
+ _TESTS = [{
+ 'url': 'http://www.ximalaya.com/61425525/album/5534601/',
+ 'info_dict': {
+ 'title': '唐诗三百首(含赏析)',
+ 'id': '5534601',
+ },
+ 'playlist_count': 312,
+ }, {
+ 'url': 'http://m.ximalaya.com/61425525/album/5534601',
+ 'info_dict': {
+ 'title': '唐诗三百首(含赏析)',
+ 'id': '5534601',
+ },
+ 'playlist_count': 312,
+ },
+ ]
+
+ def _real_extract(self, url):
+ self.scheme = scheme = 'https' if url.startswith('https') else 'http'
+
+ mobj = re.match(self._VALID_URL, url)
+ uid, playlist_id = mobj.group('uid'), mobj.group('id')
+
+ webpage = self._download_webpage(self._TEMPLATE_URL % (scheme, uid, playlist_id), playlist_id,
+ note='Download album page for %s' % playlist_id,
+ errnote='Unable to get album info')
+
+ title = self._html_search_regex(r'detailContent_title[^>]*><h1(?:[^>]+)?>([^<]+)</h1>',
+ webpage, 'title', fatal=False)
+
+ return self.playlist_result(self._entries(webpage, playlist_id, uid), playlist_id, title)
+
+ def _entries(self, page, playlist_id, uid):
+ html = page
+ for page_num in itertools.count(1):
+ for entry in self._process_page(html, uid):
+ yield entry
+
+ next_url = self._search_regex(r'<a\s+href=(["\'])(?P<more>[\S]+)\1[^>]+rel=(["\'])next\3',
+ html, 'list_next_url', default=None, group='more')
+ if not next_url:
+ break
+
+ next_full_url = self._BASE_URL_TEMPL % (self.scheme, next_url)
+ html = self._download_webpage(next_full_url, playlist_id)
+
+ def _process_page(self, html, uid):
+ find_from = html.index('album_soundlist')
+ for mobj in re.finditer(self._LIST_VIDEO_RE % uid, html[find_from:]):
+ yield self.url_result(self._BASE_URL_TEMPL % (self.scheme, mobj.group('url')),
+ XimalayaIE.ie_key(),
+ mobj.group('id'),
+ mobj.group('title'))
diff --git a/hypervideo_dl/extractor/xminus.py b/hypervideo_dl/extractor/xminus.py
new file mode 100644
index 0000000..36e5ead
--- /dev/null
+++ b/hypervideo_dl/extractor/xminus.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_ord,
+)
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
+
+
+class XMinusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
+ 'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
+ 'info_dict': {
+ 'id': '4542',
+ 'ext': 'mp3',
+ 'title': 'Леонид Агутин-Песенка шофёра',
+ 'duration': 156,
+ 'tbr': 320,
+ 'filesize_approx': 5900000,
+ 'view_count': int,
+ 'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ artist = self._html_search_regex(
+ r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
+ title = artist + '-' + self._html_search_regex(
+ r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
+ duration = parse_duration(self._html_search_regex(
+ r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
+ webpage, 'duration', fatal=False))
+ mobj = re.search(
+ r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
+ webpage)
+ tbr = filesize_approx = None
+ if mobj:
+ filesize_approx = float(mobj.group('filesize')) * 1000000
+ tbr = float(mobj.group('tbr'))
+ view_count = int_or_none(self._html_search_regex(
+ r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
+ webpage, 'view count', fatal=False))
+ description = self._html_search_regex(
+ r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
+ webpage, 'song lyrics', fatal=False)
+ if description:
+ description = re.sub(' *\r *', '\n', description)
+
+ k = self._search_regex(
+ r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
+ 'encoded data')
+ h = time.time() / 3600
+ a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
+ video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ # The extension is unknown until actual downloading
+ 'ext': 'mp3',
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'tbr': tbr,
+ 'view_count': view_count,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/xnxx.py b/hypervideo_dl/extractor/xnxx.py
new file mode 100644
index 0000000..ac1ccc4
--- /dev/null
+++ b/hypervideo_dl/extractor/xnxx.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ NO_DEFAULT,
+ str_to_int,
+)
+
+
+class XNXXIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
+ _TESTS = [{
+ 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
+ 'md5': '7583e96c15c0f21e9da3453d9920fbba',
+ 'info_dict': {
+ 'id': '55awb78',
+ 'ext': 'mp4',
+ 'title': 'Skyrim Test Video',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 469,
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.xnxx.com/video-55awb78/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ def get(meta, default=NO_DEFAULT, fatal=True):
+ return self._search_regex(
+ r'set%s\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % meta,
+ webpage, meta, default=default, fatal=fatal, group='value')
+
+ title = self._og_search_title(
+ webpage, default=None) or get('VideoTitle')
+
+ formats = []
+ for mobj in re.finditer(
+ r'setVideo(?:Url(?P<id>Low|High)|HLS)\s*\(\s*(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
+ format_url = mobj.group('url')
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=1, m3u8_id='hls', fatal=False))
+ else:
+ format_id = mobj.group('id')
+ if format_id:
+ format_id = format_id.lower()
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': -1 if format_id == 'low' else 0,
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
+ 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
+ duration = int_or_none(self._og_search_property('duration', webpage))
+ view_count = str_to_int(self._search_regex(
+ r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
+ default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/xstream.py b/hypervideo_dl/extractor/xstream.py
new file mode 100644
index 0000000..76c91bd
--- /dev/null
+++ b/hypervideo_dl/extractor/xstream.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ find_xpath_attr,
+)
+
+
+class XstreamIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ xstream:|
+ https?://frontend\.xstream\.(?:dk|net)/
+ )
+ (?P<partner_id>[^/]+)
+ (?:
+ :|
+ /feed/video/\?.*?\bid=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ }, {
+ 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, partner_id, video_id):
+ data = self._download_xml(
+ 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
+ % (partner_id, video_id),
+ video_id)
+
+ NS_MAP = {
+ 'atom': 'http://www.w3.org/2005/Atom',
+ 'xt': 'http://xstream.dk/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ }
+
+ entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+ title = xpath_text(
+ entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+ description = xpath_text(
+ entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+ timestamp = parse_iso8601(xpath_text(
+ entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+ formats = []
+ media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+ for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+ media_url = media_content.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media_content.get('bitrate'))
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+ if mobj:
+ formats.append({
+ 'url': mobj.group('url'),
+ 'play_path': 'mp4:%s' % mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'format_id': 'rtmp-%d' % tbr,
+ })
+ else:
+ formats.append({
+ 'url': media_url,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ link = find_xpath_attr(
+ entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+ if link is not None:
+ formats.append({
+ 'url': link.get('href'),
+ 'format_id': link.get('rel'),
+ 'preference': 1,
+ })
+
+ thumbnails = [{
+ 'url': splash.get('url'),
+ 'width': int_or_none(splash.get('width')),
+ 'height': int_or_none(splash.get('height')),
+ } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ partner_id = mobj.group('partner_id')
+ video_id = mobj.group('id')
+
+ return self._extract_video_info(partner_id, video_id)
diff --git a/hypervideo_dl/extractor/xtube.py b/hypervideo_dl/extractor/xtube.py
new file mode 100644
index 0000000..7246409
--- /dev/null
+++ b/hypervideo_dl/extractor/xtube.py
@@ -0,0 +1,233 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ orderedSet,
+ parse_duration,
+ sanitized_Request,
+ str_to_int,
+ url_or_none,
+)
+
+
+class XTubeIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ xtube:|
+ https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-)
+ )
+ (?P<id>[^/?&#]+)
+ '''
+
+ _TESTS = [{
+ # old URL schema
+ 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
+ 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
+ 'info_dict': {
+ 'id': 'kVTUy_G222_',
+ 'ext': 'mp4',
+ 'title': 'strange erotica',
+ 'description': 'contains:an ET kind of thing',
+ 'uploader': 'greenshowers',
+ 'duration': 450,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ # FLV videos with duplicated formats
+ 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
+ 'md5': 'a406963eb349dd43692ec54631efd88b',
+ 'info_dict': {
+ 'id': '9299752',
+ 'display_id': 'A-Super-Run-Part-1-YT',
+ 'ext': 'flv',
+ 'title': 'A Super Run - Part 1 (YT)',
+ 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
+ 'uploader': 'tshirtguy59',
+ 'duration': 579,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ # new URL schema
+ 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
+ 'only_matching': True,
+ }, {
+ 'url': 'xtube:625837',
+ 'only_matching': True,
+ }, {
+ 'url': 'xtube:kVTUy_G222_',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ if not display_id:
+ display_id = video_id
+
+ if video_id.isdigit() and len(video_id) < 11:
+ url_pattern = 'http://www.xtube.com/video-watch/-%s'
+ else:
+ url_pattern = 'http://www.xtube.com/watch.php?v=%s'
+
+ webpage = self._download_webpage(
+ url_pattern % video_id, display_id, headers={
+ 'Cookie': 'age_verified=1; cookiesAccepted=1',
+ })
+
+ title, thumbnail, duration, sources, media_definition = [None] * 5
+
+ config = self._parse_json(self._search_regex(
+ r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
+ default='{}'), video_id, transform_source=js_to_json, fatal=False)
+ if config:
+ config = config.get('mainRoll')
+ if isinstance(config, dict):
+ title = config.get('title')
+ thumbnail = config.get('poster')
+ duration = int_or_none(config.get('duration'))
+ sources = config.get('sources') or config.get('format')
+ media_definition = config.get('mediaDefinition')
+
+ if not isinstance(sources, dict) and not media_definition:
+ sources = self._parse_json(self._search_regex(
+ r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
+ webpage, 'sources', group='sources'), video_id,
+ transform_source=js_to_json)
+
+ formats = []
+ format_urls = set()
+
+ if isinstance(sources, dict):
+ for format_id, format_url in sources.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+
+ if isinstance(media_definition, list):
+ for media in media_definition:
+ video_url = url_or_none(media.get('videoUrl'))
+ if not video_url:
+ continue
+ if video_url in format_urls:
+ continue
+ format_urls.add(video_url)
+ format_id = media.get('format')
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id == 'mp4':
+ height = int_or_none(media.get('quality'))
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%s-%d' % (format_id, height) if height else format_id,
+ 'height': height,
+ })
+
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ if not title:
+ title = self._search_regex(
+ (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
+ webpage, 'title', group='title')
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:description', webpage, default=None) or self._search_regex(
+ r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
+ uploader = self._search_regex(
+ (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
+ r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
+ webpage, 'uploader', fatal=False)
+ if not duration:
+ duration = parse_duration(self._search_regex(
+ r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
+ webpage, 'duration', fatal=False))
+ view_count = str_to_int(self._search_regex(
+ (r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
+ r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'>Comments? \(([\d,\.]+)\)<',
+ webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
+
+class XTubeUserIE(InfoExtractor):
+ IE_DESC = 'XTube user profile'
+ _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)'
+ _TEST = {
+ 'url': 'http://www.xtube.com/profile/greenshowers-4056496',
+ 'info_dict': {
+ 'id': 'greenshowers-4056496',
+ 'age_limit': 18,
+ },
+ 'playlist_mincount': 154,
+ }
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ entries = []
+ for pagenum in itertools.count(1):
+ request = sanitized_Request(
+ 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum),
+ headers={
+ 'Cookie': 'popunder=4',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': url,
+ })
+
+ page = self._download_json(
+ request, user_id, 'Downloading videos JSON page %d' % pagenum)
+
+ html = page.get('html')
+ if not html:
+ break
+
+ for video_id in orderedSet([video_id for _, video_id in re.findall(
+ r'data-plid=(["\'])(.+?)\1', html)]):
+ entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))
+
+ page_count = int_or_none(page.get('pageCount'))
+ if not page_count or pagenum == page_count:
+ break
+
+ playlist = self.playlist_result(entries, user_id)
+ playlist['age_limit'] = 18
+ return playlist
diff --git a/hypervideo_dl/extractor/xuite.py b/hypervideo_dl/extractor/xuite.py
new file mode 100644
index 0000000..0276c0d
--- /dev/null
+++ b/hypervideo_dl/extractor/xuite.py
@@ -0,0 +1,153 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ get_element_by_attribute,
+ parse_iso8601,
+ remove_end,
+)
+
+
+class XuiteIE(InfoExtractor):
+ IE_DESC = '隨意窩Xuite影音'
+ _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
+ _VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
+ _TESTS = [{
+ # Audio
+ 'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
+ 'md5': 'e79284c87b371424885448d11f6398c8',
+ 'info_dict': {
+ 'id': '3860914',
+ 'ext': 'mp3',
+ 'title': '孤單南半球-歐德陽',
+ 'description': '孤單南半球-歐德陽',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 247.246,
+ 'timestamp': 1314932940,
+ 'upload_date': '20110902',
+ 'uploader': '阿能',
+ 'uploader_id': '15973816',
+ 'categories': ['個人短片'],
+ },
+ }, {
+ # Video with only one format
+ 'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==',
+ 'md5': '21f7b39c009b5a4615b4463df6eb7a46',
+ 'info_dict': {
+ 'id': '25925099',
+ 'ext': 'mp4',
+ 'title': 'BigBuckBunny_320x180',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 596.458,
+ 'timestamp': 1454242500,
+ 'upload_date': '20160131',
+ 'uploader': '屁姥',
+ 'uploader_id': '12158353',
+ 'categories': ['個人短片'],
+ 'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
+ },
+ }, {
+ # Video with two formats
+ 'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
+ 'md5': '1166e0f461efe55b62e26a2d2a68e6de',
+ 'info_dict': {
+ 'id': '21301170',
+ 'ext': 'mp4',
+ 'title': '暗殺教室 02',
+ 'description': '字幕:【極影字幕社】',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1384.907,
+ 'timestamp': 1421481240,
+ 'upload_date': '20150117',
+ 'uploader': '我只是想認真點',
+ 'uploader_id': '242127761',
+ 'categories': ['電玩動漫'],
+ },
+ 'skip': 'Video removed',
+ }, {
+ # Video with encoded media id
+ # from http://forgetfulbc.blogspot.com/2016/06/date.html
+ 'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
+ 'info_dict': {
+ 'id': '27447336',
+ 'ext': 'mp4',
+ 'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
+ 'description': 'md5:1223810fa123b179083a3aed53574706',
+ 'timestamp': 1466160960,
+ 'upload_date': '20160617',
+ 'uploader': 'B.C. & Lowy',
+ 'uploader_id': '232279340',
+ },
+ }, {
+ 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ # /play/ URLs provide embedded video URL and more metadata
+ url = url.replace('/embed/', '/play/')
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ error_msg = self._search_regex(
+ r'<div id="error-message-content">([^<]+)',
+ webpage, 'error message', default=None)
+ if error_msg:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_msg),
+ expected=True)
+
+ media_info = self._parse_json(self._search_regex(
+ r'var\s+mediaInfo\s*=\s*({.*});', webpage, 'media info'), video_id)
+
+ video_id = media_info['MEDIA_ID']
+
+ formats = []
+ for key in ('html5Url', 'html5HQUrl'):
+ video_url = media_info.get(key)
+ if not video_url:
+ continue
+ format_id = self._search_regex(
+ r'\bq=(.+?)\b', video_url, 'format id', default=None)
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4' if format_id.isnumeric() else format_id,
+ 'format_id': format_id,
+ 'height': int(format_id) if format_id.isnumeric() else None,
+ })
+ self._sort_formats(formats)
+
+ timestamp = media_info.get('PUBLISH_DATETIME')
+ if timestamp:
+ timestamp = parse_iso8601(timestamp + ' +0800', ' ')
+
+ category = media_info.get('catName')
+ categories = [category] if category else []
+
+ uploader = media_info.get('NICKNAME')
+ uploader_url = None
+
+ author_div = get_element_by_attribute('itemprop', 'author', webpage)
+ if author_div:
+ uploader = uploader or self._html_search_meta('name', author_div)
+ uploader_url = self._html_search_regex(
+ r'<link[^>]+itemprop="url"[^>]+href="([^"]+)"', author_div,
+ 'uploader URL', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': media_info['TITLE'],
+ 'description': remove_end(media_info.get('metaDesc'), ' (Xuite 影音)'),
+ 'thumbnail': media_info.get('ogImageUrl'),
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': media_info.get('MEMBER_ID'),
+ 'uploader_url': uploader_url,
+ 'duration': float_or_none(media_info.get('MEDIA_DURATION'), 1000000),
+ 'categories': categories,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/xvideos.py b/hypervideo_dl/extractor/xvideos.py
new file mode 100644
index 0000000..8fc6491
--- /dev/null
+++ b/hypervideo_dl/extractor/xvideos.py
@@ -0,0 +1,147 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+)
+
+
+class XVideosIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[^/]+\.)?xvideos2?\.com/video|
+ (?:www\.)?xvideos\.es/video|
+ flashservice\.xvideos\.com/embedframe/|
+ static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
+ )
+ (?P<id>[0-9]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
+ 'md5': '14cea69fcb84db54293b1e971466c2e1',
+ 'info_dict': {
+ 'id': '4588838',
+ 'ext': 'mp4',
+ 'title': 'Biker Takes his Girl',
+ 'duration': 108,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://flashservice.xvideos.com/embedframe/4588838',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.xvideos.com/video%s/' % video_id, video_id)
+
+ mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
+ if mobj:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
+
+ title = self._html_search_regex(
+ (r'<title>(?P<title>.+?)\s+-\s+XVID',
+ r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', default=None,
+ group='title') or self._og_search_title(webpage)
+
+ thumbnails = []
+ for preference, thumbnail in enumerate(('', '169')):
+ thumbnail_url = self._search_regex(
+ r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail,
+ webpage, 'thumbnail', default=None, group='thumbnail')
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
+ })
+
+ duration = int_or_none(self._og_search_property(
+ 'duration', webpage, default=None)) or parse_duration(
+ self._search_regex(
+ r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
+ webpage, 'duration', fatal=False))
+
+ formats = []
+
+ video_url = compat_urllib_parse_unquote(self._search_regex(
+ r'flv_url=(.+?)&', webpage, 'video URL', default=''))
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'flv',
+ })
+
+ for kind, _, format_url in re.findall(
+ r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage):
+ format_id = kind.lower()
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ elif format_id in ('urllow', 'urlhigh'):
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]),
+ 'quality': -2 if format_id.endswith('low') else None,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/xxxymovies.py b/hypervideo_dl/extractor/xxxymovies.py
new file mode 100644
index 0000000..e34ebe3
--- /dev/null
+++ b/hypervideo_dl/extractor/xxxymovies.py
@@ -0,0 +1,81 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+)
+
+
+class XXXYMoviesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
+ _TEST = {
+ 'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
+ 'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
+ 'info_dict': {
+ 'id': '138669',
+ 'display_id': 'ecstatic-orgasm-sofcore',
+ 'ext': 'mp4',
+ 'title': 'Ecstatic Orgasm Sofcore',
+ 'duration': 931,
+ 'categories': list,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
+
+ title = self._html_search_regex(
+ [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
+ r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
+ webpage, 'title')
+
+ thumbnail = self._search_regex(
+ r"preview_url\s*:\s*'([^']+)'",
+ webpage, 'thumbnail', fatal=False)
+
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+
+ duration = parse_duration(self._search_regex(
+ r'<span>Duration:</span>\s*(\d+:\d+)',
+ webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._html_search_regex(
+ r'<div class="video_views">\s*(\d+)',
+ webpage, 'view count', fatal=False))
+ like_count = int_or_none(self._search_regex(
+ r'>\s*Likes? <b>\((\d+)\)',
+ webpage, 'like count', fatal=False))
+ dislike_count = int_or_none(self._search_regex(
+ r'>\s*Dislike <b>\((\d+)\)</b>',
+ webpage, 'dislike count', fatal=False))
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'categories': categories,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'age_limit': age_limit,
+ }
diff --git a/hypervideo_dl/extractor/yahoo.py b/hypervideo_dl/extractor/yahoo.py
new file mode 100644
index 0000000..a17b10d
--- /dev/null
+++ b/hypervideo_dl/extractor/yahoo.py
@@ -0,0 +1,569 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import itertools
+import re
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ smuggle_url,
+ try_get,
+ url_or_none,
+)
+
+from .brightcove import BrightcoveNewIE
+
+
+class YahooIE(InfoExtractor):
+ IE_DESC = 'Yahoo screen and movies'
+ _VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
+ _TESTS = [{
+ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+ 'info_dict': {
+ 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
+ 'ext': 'mp4',
+ 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
+ 'description': 'Julian and Travis watch Julian Smith',
+ 'duration': 6863,
+ 'timestamp': 1369812016,
+ 'upload_date': '20130529',
+ },
+ }, {
+ 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
+ 'md5': '7993e572fac98e044588d0b5260f4352',
+ 'info_dict': {
+ 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
+ 'ext': 'mp4',
+ 'title': "Yahoo Saves 'Community'",
+ 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
+ 'duration': 170,
+ 'timestamp': 1406838636,
+ 'upload_date': '20140731',
+ },
+ }, {
+ 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
+ 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
+ 'info_dict': {
+ 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
+ 'ext': 'webm',
+ 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
+ 'description': 'md5:f66c890e1490f4910a9953c941dee944',
+ 'duration': 97,
+ 'timestamp': 1414489862,
+ 'upload_date': '20141028',
+ }
+ }, {
+ 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
+ 'md5': '88e209b417f173d86186bef6e4d1f160',
+ 'info_dict': {
+ 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
+ 'ext': 'mp4',
+ 'title': 'China Moses Is Crazy About the Blues',
+ 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
+ 'duration': 128,
+ 'timestamp': 1385722202,
+ 'upload_date': '20131129',
+ }
+ }, {
+ 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
+ 'md5': '2a9752f74cb898af5d1083ea9f661b58',
+ 'info_dict': {
+ 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+ 'ext': 'mp4',
+ 'title': '\'True Story\' Trailer',
+ 'description': 'True Story',
+ 'duration': 150,
+ 'timestamp': 1418919206,
+ 'upload_date': '20141218',
+ },
+ }, {
+ 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
+ 'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ 'timestamp': 1426270238,
+ },
+ }, {
+ 'url': 'https://tw.news.yahoo.com/-100120367.html',
+ 'only_matching': True,
+ }, {
+ # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
+ 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
+ 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+ 'info_dict': {
+ 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
+ 'ext': 'mp4',
+ 'title': 'Communitary - Community Episode 1: Ladders',
+ 'description': 'md5:8fc39608213295748e1e289807838c97',
+ 'duration': 1646,
+ 'timestamp': 1440436550,
+ 'upload_date': '20150824',
+ 'series': 'Communitary',
+ 'season_number': 6,
+ 'episode_number': 1,
+ },
+ }, {
+ # ytwnews://cavideo/
+ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+ 'info_dict': {
+ 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+ 'ext': 'mp4',
+ 'title': '單車天使 - 中文版預',
+ 'description': '中文版預',
+ 'timestamp': 1476696196,
+ 'upload_date': '20161017',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Contains both a Yahoo hosted video and multiple Youtube embeds
+ 'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
+ 'info_dict': {
+ 'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
+ 'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead',
+ 'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6',
+ 'ext': 'mp4',
+ 'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
+ 'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
+ 'timestamp': 1572406500,
+ 'upload_date': '20191030',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '352CFDOQrKg',
+ 'ext': 'mp4',
+ 'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
+ 'description': 'md5:35b61e94c2ae214bc965ff4245f80d11',
+ 'uploader': 'The Voice',
+ 'uploader_id': 'NBCTheVoice',
+ 'upload_date': '20191029',
+ },
+ }],
+ 'params': {
+ 'playlistend': 2,
+ },
+ 'expected_warnings': ['HTTP Error 404'],
+ }, {
+ 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_yahoo_video(self, video_id, country):
+ video = self._download_json(
+ 'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
+ video_id, 'Downloading video JSON metadata')[0]
+ title = video['title']
+
+ if country == 'malaysia':
+ country = 'my'
+
+ is_live = video.get('live_state') == 'live'
+ fmts = ('m3u8',) if is_live else ('webm', 'mp4')
+
+ urls = []
+ formats = []
+ subtitles = {}
+ for fmt in fmts:
+ media_obj = self._download_json(
+ 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+ video_id, 'Downloading %s JSON metadata' % fmt,
+ headers=self.geo_verification_headers(), query={
+ 'format': fmt,
+ 'region': country.upper(),
+ })['query']['results']['mediaObj'][0]
+ msg = media_obj.get('status', {}).get('msg')
+
+ for s in media_obj.get('streams', []):
+ host = s.get('host')
+ path = s.get('path')
+ if not host or not path:
+ continue
+ s_url = host + path
+ if s.get('format') == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ s_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ continue
+ tbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'url': s_url,
+ 'format_id': fmt + ('-%d' % tbr if tbr else ''),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'tbr': tbr,
+ 'fps': int_or_none(s.get('framerate')),
+ })
+
+ for cc in media_obj.get('closedcaptions', []):
+ cc_url = cc.get('url')
+ if not cc_url or cc_url in urls:
+ continue
+ urls.append(cc_url)
+ subtitles.setdefault(cc.get('lang') or 'en-US', []).append({
+ 'url': cc_url,
+ 'ext': mimetype2ext(cc.get('content_type')),
+ })
+
+ streaming_url = video.get('streaming_url')
+ if streaming_url and not is_live:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ if not formats and msg == 'geo restricted':
+ self.raise_geo_restricted()
+
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumb in video.get('thumbnails', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb.get('tag'),
+ 'url': thumb.get('url'),
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ series_info = video.get('series_info') or {}
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(video.get('description')),
+ 'timestamp': parse_iso8601(video.get('publish_time')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('view_count')),
+ 'is_live': is_live,
+ 'series': video.get('show_name'),
+ 'season_number': int_or_none(series_info.get('season_number')),
+ 'episode_number': int_or_none(series_info.get('episode_number')),
+ }
+
+ def _real_extract(self, url):
+ url, country, display_id = re.match(self._VALID_URL, url).groups()
+ if not country:
+ country = 'us'
+ else:
+ country = country.split('-')[0]
+
+ item = self._download_json(
+ 'https://%s.yahoo.com/caas/content/article' % country, display_id,
+ 'Downloading content JSON metadata', query={
+ 'url': url
+ })['items'][0]['data']['partnerData']
+
+ if item.get('type') != 'video':
+ entries = []
+
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+
+ for e in (item.get('body') or []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if not iframe_url:
+ continue
+ entries.append(self.url_result(iframe_url))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ info = self._extract_yahoo_video(item['uuid'], country)
+ info['display_id'] = display_id
+ return info
+
+
+class YahooSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Yahoo screen search'
+ _MAX_RESULTS = 1000
+ IE_NAME = 'screen.yahoo:search'
+ _SEARCH_KEY = 'yvsearch'
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+ entries = []
+ for pagenum in itertools.count(0):
+ result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ info = self._download_json(result_url, query,
+ note='Downloading results page ' + str(pagenum + 1))
+ m = info['m']
+ results = info['results']
+
+ for (i, r) in enumerate(results):
+ if (pagenum * 30) + i >= n:
+ break
+ mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
+ e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
+ entries.append(e)
+ if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
+ break
+
+ return {
+ '_type': 'playlist',
+ 'id': query,
+ 'entries': entries,
+ }
+
+
+class YahooGyaOPlayerIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao:player'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
+ 'info_dict': {
+ 'id': '5993125228001',
+ 'ext': 'mp4',
+ 'title': 'フューリー 【字幕版】',
+ 'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
+ 'uploader_id': '4235717419001',
+ 'upload_date': '20190124',
+ 'timestamp': 1548294365,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', ':')
+ headers = self.geo_verification_headers()
+ headers['Accept'] = 'application/json'
+ resp = self._download_json(
+ 'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
+ 'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
+ 'query': '''{
+ content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
+ video {
+ delivery {
+ id
+ }
+ title
+ }
+ }
+}''' % video_id,
+ }, headers=headers)
+ content = resp['data']['content']
+ if not content:
+ msg = resp['errors'][0]['message']
+ if msg == 'not in japan':
+ self.raise_geo_restricted(countries=['JP'])
+ raise ExtractorError(msg)
+ video = content['video']
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': video['title'],
+ 'url': smuggle_url(
+ 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
+ {'geo_countries': ['JP']}),
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ }
+
+
+class YahooGyaOIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
+ 'info_dict': {
+ 'id': '00449:v03102',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url).replace('/', ':')
+ videos = self._download_json(
+ 'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos']
+ entries = []
+ for video in videos:
+ video_id = video.get('id')
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
+ YahooGyaOPlayerIE.ie_key(), video_id))
+ return self.playlist_result(entries, program_id)
+
+
+class YahooJapanNewsIE(InfoExtractor):
+ IE_NAME = 'yahoo:japannews'
+ IE_DESC = 'Yahoo! Japan News'
+ _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
+ _GEO_COUNTRIES = ['JP']
+ _TESTS = [{
+ 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
+ 'info_dict': {
+ 'id': '1736242',
+ 'ext': 'mp4',
+ 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
+ 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
+ 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted
+ 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://headlines.yahoo.co.jp/videonews/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp/feature/1356',
+ 'only_matching': True
+ }]
+
+ def _extract_formats(self, json_data, content_id):
+ formats = []
+
+ video_data = try_get(
+ json_data,
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+ for vid in video_data or []:
+ delivery = vid.get('delivery')
+ url = url_or_none(vid.get('Url'))
+ if not delivery or not url:
+ continue
+ elif delivery == 'hls':
+ formats.extend(
+ self._extract_m3u8_formats(
+ url, content_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': url,
+ 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
+ 'height': int_or_none(vid.get('height')),
+ 'width': int_or_none(vid.get('width')),
+ 'tbr': int_or_none(vid.get('bitrate')),
+ })
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ display_id = mobj.group('id') or host
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, 'title', default=None
+ ) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
+
+ if display_id == host:
+ # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
+ stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
+ entries = [
+ self.url_result(
+ smuggle_url(
+ 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
+ {'geo_countries': ['JP']}),
+ ie='BrightcoveNew', video_id=plist_id)
+ for plist_id in stream_plists]
+ return self.playlist_result(entries, playlist_title=title)
+
+ # Article page
+ description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image', webpage, 'thumbnail', default=None)
+ space_id = self._search_regex([
+ r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
+ r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
+ r'<!--\s+SpaceID=(\d+)'
+ ], webpage, 'spaceid')
+
+ content_id = self._search_regex(
+ r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
+ webpage, 'contentid', group='contentid')
+
+ json_data = self._download_json(
+ 'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
+ content_id,
+ query={
+ 'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
+ 'output': 'json',
+ 'space_id': space_id,
+ 'domain': host,
+ 'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
+ 'device_type': '1100',
+ })
+ formats = self._extract_formats(json_data, content_id)
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/yandexdisk.py b/hypervideo_dl/extractor/yandexdisk.py
new file mode 100644
index 0000000..6fcd8ee
--- /dev/null
+++ b/hypervideo_dl/extractor/yandexdisk.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ mimetype2ext,
+ try_get,
+ urljoin,
+)
+
+
+class YandexDiskIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://
+ (?P<domain>
+ yadi\.sk|
+ disk\.yandex\.
+ (?:
+ az|
+ by|
+ co(?:m(?:\.(?:am|ge|tr))?|\.il)|
+ ee|
+ fr|
+ k[gz]|
+ l[tv]|
+ md|
+ t[jm]|
+ u[az]|
+ ru
+ )
+ )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
+
+ _TESTS = [{
+ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
+ 'md5': 'a4a8d52958c8fddcf9845935070402ae',
+ 'info_dict': {
+ 'id': 'VdOeDou8eZs6Y',
+ 'ext': 'mp4',
+ 'title': '4.mp4',
+ 'duration': 168.6,
+ 'uploader': 'y.botova',
+ 'uploader_id': '300043621',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, video_id)
+ store = self._parse_json(self._search_regex(
+ r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
+ webpage, 'store'), video_id)
+ resource = store['resources'][store['rootResourceId']]
+
+ title = resource['name']
+ meta = resource.get('meta') or {}
+
+ public_url = meta.get('short_url')
+ if public_url:
+ video_id = self._match_id(public_url)
+
+ source_url = (self._download_json(
+ 'https://cloud-api.yandex.net/v1/disk/public/resources/download',
+ video_id, query={'public_key': url}, fatal=False) or {}).get('href')
+ video_streams = resource.get('videoStreams') or {}
+ video_hash = resource.get('hash') or url
+ environment = store.get('environment') or {}
+ sk = environment.get('sk')
+ yandexuid = environment.get('yandexuid')
+ if sk and yandexuid and not (source_url and video_streams):
+ self._set_cookie(domain, 'yandexuid', yandexuid)
+
+ def call_api(action):
+ return (self._download_json(
+ urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
+ 'hash': video_hash,
+ 'sk': sk,
+ }).encode(), headers={
+ 'Content-Type': 'text/plain',
+ }, fatal=False) or {}).get('data') or {}
+ if not source_url:
+ # TODO: figure out how to detect if download limit has
+ # been reached and then avoid unnecessary source format
+ # extraction requests
+ source_url = call_api('download-url').get('url')
+ if not video_streams:
+ video_streams = call_api('get-video-streams')
+
+ formats = []
+ if source_url:
+ formats.append({
+ 'url': source_url,
+ 'format_id': 'source',
+ 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
+ 'quality': 1,
+ 'filesize': int_or_none(meta.get('size'))
+ })
+
+ for video in (video_streams.get('videos') or []):
+ format_url = video.get('url')
+ if not format_url:
+ continue
+ if video.get('dimension') == 'adaptive':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ size = video.get('size') or {}
+ height = int_or_none(size.get('height'))
+ format_id = 'hls'
+ if height:
+ format_id += '-%dp' % height
+ formats.append({
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'height': height,
+ 'protocol': 'm3u8_native',
+ 'url': format_url,
+ 'width': int_or_none(size.get('width')),
+ })
+ self._sort_formats(formats)
+
+ uid = resource.get('uid')
+ display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': float_or_none(video_streams.get('duration'), 1000),
+ 'uploader': display_name,
+ 'uploader_id': uid,
+ 'view_count': int_or_none(meta.get('views_counter')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/yandexmusic.py b/hypervideo_dl/extractor/yandexmusic.py
new file mode 100644
index 0000000..0b86c71
--- /dev/null
+++ b/hypervideo_dl/extractor/yandexmusic.py
@@ -0,0 +1,459 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ try_get,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
+
+ @staticmethod
+ def _handle_error(response):
+ if isinstance(response, dict):
+ error = response.get('error')
+ if error:
+ raise ExtractorError(error, expected=True)
+ if response.get('type') == 'captcha' or 'captcha' in response:
+ YandexMusicBaseIE._raise_captcha()
+
+ @staticmethod
+ def _raise_captcha():
+ raise ExtractorError(
+ 'YandexMusic has considered hypervideo requests automated and '
+ 'asks you to solve a CAPTCHA. You can either wait for some '
+ 'time until unblocked and optionally use --sleep-interval '
+ 'in future or alternatively you can go to https://music.yandex.ru/ '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'hypervideo with --cookies',
+ expected=True)
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
+ if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
+ self._raise_captcha()
+ return webpage
+
+ def _download_json(self, *args, **kwargs):
+ response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+ self._handle_error(response)
+ return response
+
+ def _call_api(self, ep, tld, url, item_id, note, query):
+ return self._download_json(
+ 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
+ item_id, note,
+ fatal=False,
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-Retpath-Y': url,
+ },
+ query=query)
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:track'
+ IE_DESC = 'Яндекс.Музыка - Трек'
+ _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/album/540508/track/4878838',
+ 'md5': 'dec8b661f12027ceaba33318787fff76',
+ 'info_dict': {
+ 'id': '4878838',
+ 'ext': 'mp3',
+ 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
+ 'filesize': int,
+ 'duration': 193.04,
+ 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
+ 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
+ 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
+ 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
+ 'release_year': 2009,
+ },
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ # multiple disks
+ 'url': 'http://music.yandex.ru/album/3840501/track/705105',
+ 'md5': '82a54e9e787301dd45aba093cf6e58c0',
+ 'info_dict': {
+ 'id': '705105',
+ 'ext': 'mp3',
+ 'title': 'md5:f86d4a9188279860a83000277024c1a6',
+ 'filesize': int,
+ 'duration': 239.27,
+ 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
+ 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
+ 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
+ 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
+ 'release_year': 2016,
+ 'genre': 'pop',
+ 'disc_number': 2,
+ 'track_number': 9,
+ },
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'http://music.yandex.com/album/540508/track/4878838',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
+
+ track = self._call_api(
+ 'track', tld, url, track_id, 'Downloading track JSON',
+ {'track': '%s:%s' % (track_id, album_id)})['track']
+ track_title = track['title']
+
+ download_data = self._download_json(
+ 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
+ track_id, 'Downloading track location url JSON',
+ headers={'X-Retpath-Y': url})
+
+ fd_data = self._download_json(
+ download_data['src'], track_id,
+ 'Downloading track location JSON',
+ query={'format': 'json'})
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
+ f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
+
+ thumbnail = None
+ cover_uri = track.get('albums', [{}])[0].get('coverUri')
+ if cover_uri:
+ thumbnail = cover_uri.replace('%%', 'orig')
+ if not thumbnail.startswith('http'):
+ thumbnail = 'http://' + thumbnail
+
+ track_info = {
+ 'id': track_id,
+ 'ext': 'mp3',
+ 'url': f_url,
+ 'filesize': int_or_none(track.get('fileSize')),
+ 'duration': float_or_none(track.get('durationMs'), 1000),
+ 'thumbnail': thumbnail,
+ 'track': track_title,
+ 'acodec': download_data.get('codec'),
+ 'abr': int_or_none(download_data.get('bitrate')),
+ }
+
+ def extract_artist_name(artist):
+ decomposed = artist.get('decomposed')
+ if not isinstance(decomposed, list):
+ return artist['name']
+ parts = [artist['name']]
+ for element in decomposed:
+ if isinstance(element, dict) and element.get('name'):
+ parts.append(element['name'])
+ elif isinstance(element, compat_str):
+ parts.append(element)
+ return ''.join(parts)
+
+ def extract_artist(artist_list):
+ if artist_list and isinstance(artist_list, list):
+ artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
+ if artists_names:
+ return ', '.join(artists_names)
+
+ albums = track.get('albums')
+ if albums and isinstance(albums, list):
+ album = albums[0]
+ if isinstance(album, dict):
+ year = album.get('year')
+ disc_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['volume']))
+ track_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['index']))
+ track_info.update({
+ 'album': album.get('title'),
+ 'album_artist': extract_artist(album.get('artists')),
+ 'release_year': int_or_none(year),
+ 'genre': album.get('genre'),
+ 'disc_number': disc_number,
+ 'track_number': track_number,
+ })
+
+ track_artist = extract_artist(track.get('artists'))
+ if track_artist:
+ track_info.update({
+ 'artist': track_artist,
+ 'title': '%s - %s' % (track_artist, track_title),
+ })
+ else:
+ track_info['title'] = track_title
+
+ return track_info
+
+
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
+ def _extract_tracks(self, source, item_id, url, tld):
+ tracks = source['tracks']
+ track_ids = [compat_str(track_id) for track_id in source['trackIds']]
+
+ # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
+ # missing tracks should be retrieved manually.
+ if len(tracks) < len(track_ids):
+ present_track_ids = set([
+ compat_str(track['id'])
+ for track in tracks if track.get('id')])
+ missing_track_ids = [
+ track_id for track_id in track_ids
+ if track_id not in present_track_ids]
+ # Request missing tracks in chunks to avoid exceeding max HTTP header size,
+ # see https://github.com/ytdl-org/youtube-dl/issues/27355
+ _TRACKS_PER_CHUNK = 250
+ for chunk_num in itertools.count(0):
+ start = chunk_num * _TRACKS_PER_CHUNK
+ end = start + _TRACKS_PER_CHUNK
+ missing_track_ids_req = missing_track_ids[start:end]
+ assert missing_track_ids_req
+ missing_tracks = self._call_api(
+ 'track-entries', tld, url, item_id,
+ 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
+ 'entries': ','.join(missing_track_ids_req),
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ 'strict': 'true',
+ })
+ if missing_tracks:
+ tracks.extend(missing_tracks)
+ if end >= len(missing_track_ids):
+ break
+
+ return tracks
+
+ def _build_playlist(self, tracks):
+ entries = []
+ for track in tracks:
+ track_id = track.get('id') or track.get('realId')
+ if not track_id:
+ continue
+ albums = track.get('albums')
+ if not albums or not isinstance(albums, list):
+ continue
+ album = albums[0]
+ if not isinstance(album, dict):
+ continue
+ album_id = album.get('id')
+ if not album_id:
+ continue
+ entries.append(self.url_result(
+ 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
+ ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
+ return entries
+
+
+class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
+ IE_NAME = 'yandexmusic:album'
+ IE_DESC = 'Яндекс.Музыка - Альбом'
+ _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/album/540508',
+ 'info_dict': {
+ 'id': '540508',
+ 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
+ },
+ 'playlist_count': 50,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'https://music.yandex.ru/album/3840501',
+ 'info_dict': {
+ 'id': '3840501',
+ 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
+ },
+ 'playlist_count': 33,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ # empty artists
+ 'url': 'https://music.yandex.ru/album/9091882',
+ 'info_dict': {
+ 'id': '9091882',
+ 'title': 'ТЕД на русском',
+ },
+ 'playlist_count': 187,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld = mobj.group('tld')
+ album_id = mobj.group('id')
+
+ album = self._call_api(
+ 'album', tld, url, album_id, 'Downloading album JSON',
+ {'album': album_id})
+
+ entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
+
+ title = album['title']
+ artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
+ if artist:
+ title = '%s - %s' % (artist, title)
+ year = album.get('year')
+ if year:
+ title += ' (%s)' % year
+
+ return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
+ IE_NAME = 'yandexmusic:playlist'
+ IE_DESC = 'Яндекс.Музыка - Плейлист'
+ _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
+ 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+ },
+ 'playlist_count': 5,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
+ 'only_matching': True,
+ }, {
+ # playlist exceeding the limit of 150 tracks (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6666)
+ 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
+ 'info_dict': {
+ 'id': '1364',
+ 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
+ },
+ 'playlist_mincount': 437,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld = mobj.group('tld')
+ user = mobj.group('user')
+ playlist_id = mobj.group('id')
+
+ playlist = self._call_api(
+ 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
+ 'owner': user,
+ 'kinds': playlist_id,
+ 'light': 'true',
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ })['playlist']
+
+ tracks = self._extract_tracks(playlist, playlist_id, url, tld)
+
+ return self.playlist_result(
+ self._build_playlist(tracks),
+ compat_str(playlist_id),
+ playlist.get('title'), playlist.get('description'))
+
+
+class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
+ def _call_artist(self, tld, url, artist_id):
+ return self._call_api(
+ 'artist', tld, url, artist_id,
+ 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
+ 'artist': artist_id,
+ 'what': self._ARTIST_WHAT,
+ 'sort': self._ARTIST_SORT or '',
+ 'dir': '',
+ 'period': '',
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ })
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld = mobj.group('tld')
+ artist_id = mobj.group('id')
+ data = self._call_artist(tld, url, artist_id)
+ tracks = self._extract_tracks(data, artist_id, url, tld)
+ title = try_get(data, lambda x: x['artist']['name'], compat_str)
+ return self.playlist_result(
+ self._build_playlist(tracks), artist_id, title)
+
+
+class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
+ IE_NAME = 'yandexmusic:artist:tracks'
+ IE_DESC = 'Яндекс.Музыка - Артист - Треки'
+ _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'https://music.yandex.ru/artist/617526/tracks',
+ 'info_dict': {
+ 'id': '617526',
+ 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
+ },
+ 'playlist_count': 507,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ _ARTIST_SORT = ''
+ _ARTIST_WHAT = 'tracks'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld = mobj.group('tld')
+ artist_id = mobj.group('id')
+ data = self._call_artist(tld, url, artist_id)
+ tracks = self._extract_tracks(data, artist_id, url, tld)
+ artist = try_get(data, lambda x: x['artist']['name'], compat_str)
+ title = '%s - %s' % (artist or artist_id, 'Треки')
+ return self.playlist_result(
+ self._build_playlist(tracks), artist_id, title)
+
+
+class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
+ IE_NAME = 'yandexmusic:artist:albums'
+ IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
+ _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
+
+ _TESTS = [{
+ 'url': 'https://music.yandex.ru/artist/617526/albums',
+ 'info_dict': {
+ 'id': '617526',
+ 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
+ },
+ 'playlist_count': 8,
+ # 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ _ARTIST_SORT = 'year'
+ _ARTIST_WHAT = 'albums'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld = mobj.group('tld')
+ artist_id = mobj.group('id')
+ data = self._call_artist(tld, url, artist_id)
+ entries = []
+ for album in data['albums']:
+ if not isinstance(album, dict):
+ continue
+ album_id = album.get('id')
+ if not album_id:
+ continue
+ entries.append(self.url_result(
+ 'http://music.yandex.ru/album/%s' % album_id,
+ ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
+ artist = try_get(data, lambda x: x['artist']['name'], compat_str)
+ title = '%s - %s' % (artist or artist_id, 'Альбомы')
+ return self.playlist_result(entries, artist_id, title)
diff --git a/hypervideo_dl/extractor/yandexvideo.py b/hypervideo_dl/extractor/yandexvideo.py
new file mode 100644
index 0000000..6a166ec
--- /dev/null
+++ b/hypervideo_dl/extractor/yandexvideo.py
@@ -0,0 +1,144 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class YandexVideoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=|
+ frontend\.vh\.yandex\.ru/player/
+ )
+ (?P<id>(?:[\da-f]{32}|[\w-]{12}))
+ '''
+ _TESTS = [{
+ 'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374',
+ 'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4',
+ 'info_dict': {
+ 'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374',
+ 'ext': 'mp4',
+ 'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь',
+ 'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1549972939,
+ 'duration': 5575,
+ 'age_limit': 18,
+ 'upload_date': '20190212',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ }, {
+ 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
+ 'only_matching': True,
+ }, {
+ # vod-episode, series episode
+ 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
+ 'only_matching': True,
+ }, {
+ # episode, sports
+ 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
+ 'only_matching': True,
+ }, {
+ # DASH with DRM
+ 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player = try_get((self._download_json(
+ 'https://frontend.vh.yandex.ru/graphql', video_id, data=('''{
+ player(content_id: "%s") {
+ computed_title
+ content_url
+ description
+ dislikes
+ duration
+ likes
+ program_title
+ release_date
+ release_date_ut
+ release_year
+ restriction_age
+ season
+ start_time
+ streams
+ thumbnail
+ title
+ views_count
+ }
+}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content'])
+ if not player or player.get('error'):
+ player = self._download_json(
+ 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
+ video_id, query={
+ 'stream_options': 'hires',
+ 'disable_trackings': 1,
+ })
+ content = player['content']
+
+ title = content.get('title') or content['computed_title']
+
+ formats = []
+ streams = content.get('streams') or []
+ streams.append({'url': content.get('content_url')})
+ for stream in streams:
+ content_url = url_or_none(stream.get('url'))
+ if not content_url:
+ continue
+ ext = determine_ext(content_url)
+ if ext == 'ismc':
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ content_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ content_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({'url': content_url})
+
+ self._sort_formats(formats)
+
+ timestamp = (int_or_none(content.get('release_date'))
+ or int_or_none(content.get('release_date_ut'))
+ or int_or_none(content.get('start_time')))
+ season = content.get('season') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('description'),
+ 'thumbnail': content.get('thumbnail'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(content.get('duration')),
+ 'series': content.get('program_title'),
+ 'age_limit': int_or_none(content.get('restriction_age')),
+ 'view_count': int_or_none(content.get('views_count')),
+ 'like_count': int_or_none(content.get('likes')),
+ 'dislike_count': int_or_none(content.get('dislikes')),
+ 'season_number': int_or_none(season.get('season_number')),
+ 'season_id': season.get('id'),
+ 'release_year': int_or_none(content.get('release_year')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/yapfiles.py b/hypervideo_dl/extractor/yapfiles.py
new file mode 100644
index 0000000..cfb368d
--- /dev/null
+++ b/hypervideo_dl/extractor/yapfiles.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+ url_or_none,
+)
+
+
+class YapFilesIE(InfoExtractor):
+ _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
+ _VALID_URL = r'https?:%s' % _YAPFILES_URL
+ _TESTS = [{
+ # with hd
+ 'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
+ 'md5': '2db19e2bfa2450568868548a1aa1956c',
+ 'info_dict': {
+ 'id': 'vMDE1NjcyNDUt0413',
+ 'ext': 'mp4',
+ 'title': 'Самый худший пароль WIFI',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 72,
+ },
+ }, {
+ # without hd
+ 'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
+ % YapFilesIE._YAPFILES_URL, webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ player_url = None
+ query = {}
+ if webpage:
+ player_url = self._search_regex(
+ r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'player url', default=None, group='url')
+
+ if not player_url:
+ player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
+ query = {
+ 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
+ 'type': 'json',
+ 'ref': url,
+ }
+
+ player = self._download_json(
+ player_url, video_id, query=query)['player']
+
+ playlist_url = player['playlist']
+ title = player['title']
+ thumbnail = player.get('poster')
+
+ if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
+ raise ExtractorError(
+ 'Video %s has been removed' % video_id, expected=True)
+
+ playlist = self._download_json(
+ playlist_url, video_id)['player']['main']
+
+ hd_height = int_or_none(player.get('hd'))
+
+ QUALITIES = ('sd', 'hd')
+ quality_key = qualities(QUALITIES)
+ formats = []
+ for format_id in QUALITIES:
+ is_hd = format_id == 'hd'
+ format_url = url_or_none(playlist.get(
+ 'file%s' % ('_hd' if is_hd else '')))
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality_key(format_id),
+ 'height': hd_height if is_hd else None,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(player.get('length')),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/yesjapan.py b/hypervideo_dl/extractor/yesjapan.py
new file mode 100644
index 0000000..681338c
--- /dev/null
+++ b/hypervideo_dl/extractor/yesjapan.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ get_element_by_attribute,
+ parse_iso8601,
+)
+
+
+class YesJapanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html'
+ _TEST = {
+ 'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html',
+ 'md5': 'f0be416314e5be21a12b499b330c21cf',
+ 'info_dict': {
+ 'id': '726497834',
+ 'title': 'Japanese in 5! #20 - WA And GA Particle Usages',
+ 'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....',
+ 'ext': 'mp4',
+ 'timestamp': 1416391590,
+ 'upload_date': '20141119',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ video_url = self._og_search_video_url(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ timestamp = None
+ submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage)
+ if submit_info:
+ timestamp = parse_iso8601(self._search_regex(
+ r'datetime="([^"]+)"', submit_info, 'upload date', fatal=False, default=None))
+
+ # attempt to resolve the final URL in order to get a proper extension
+ redirect_req = HEADRequest(video_url)
+ req = self._request_webpage(
+ redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
+ if req:
+ video_url = req.geturl()
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': video_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/yinyuetai.py b/hypervideo_dl/extractor/yinyuetai.py
new file mode 100644
index 0000000..1fd8d35
--- /dev/null
+++ b/hypervideo_dl/extractor/yinyuetai.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class YinYueTaiIE(InfoExtractor):
+ IE_NAME = 'yinyuetai:video'
+ IE_DESC = '音悦Tai'
+ _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://v.yinyuetai.com/video/2322376',
+ 'md5': '6e3abe28d38e3a54b591f9f040595ce0',
+ 'info_dict': {
+ 'id': '2322376',
+ 'ext': 'mp4',
+ 'title': '少女时代_PARTY_Music Video Teaser',
+ 'creator': '少女时代',
+ 'duration': 25,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://v.yinyuetai.com/video/h5/2322376',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
+ 'Downloading mv info')['videoInfo']['coreVideoInfo']
+
+ if info['error']:
+ raise ExtractorError(info['errorMsg'], expected=True)
+
+ formats = [{
+ 'url': format_info['videoUrl'],
+ 'format_id': format_info['qualityLevel'],
+ 'format': format_info.get('qualityLevelName'),
+ 'filesize': format_info.get('fileSize'),
+ # though URLs ends with .flv, the downloaded files are in fact mp4
+ 'ext': 'mp4',
+ 'tbr': format_info.get('bitrate'),
+ } for format_info in info['videoUrlModels']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['videoName'],
+ 'thumbnail': info.get('bigHeadImage'),
+ 'creator': info.get('artistNames'),
+ 'duration': info.get('duration'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ynet.py b/hypervideo_dl/extractor/ynet.py
new file mode 100644
index 0000000..c4ae4d8
--- /dev/null
+++ b/hypervideo_dl/extractor/ynet.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+
+
+class YnetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
+ _TESTS = [
+ {
+ 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
+ 'info_dict': {
+ 'id': 'L-11659-99244',
+ 'ext': 'flv',
+ 'title': 'איש לא יודע מאיפה באנו',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ }
+ }, {
+ 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
+ 'info_dict': {
+ 'id': 'L-8859-84418',
+ 'ext': 'flv',
+ 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
+ 'thumbnail': r're:^https?://.*\.jpg',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
+ config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
+ f4m_url = config['clip']['url']
+ title = self._og_search_title(webpage)
+ m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
+ if m:
+ title = m.group('title')
+ formats = self._extract_f4m_formats(f4m_url, video_id)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/hypervideo_dl/extractor/youjizz.py b/hypervideo_dl/extractor/youjizz.py
new file mode 100644
index 0000000..88aabd2
--- /dev/null
+++ b/hypervideo_dl/extractor/youjizz.py
@@ -0,0 +1,95 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ url_or_none,
+)
+
+
+class YouJizzIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))'
+ _TESTS = [{
+ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+ 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4',
+ 'info_dict': {
+ 'id': '2189178',
+ 'ext': 'mp4',
+ 'title': 'Zeichentrick 1',
+ 'age_limit': 18,
+ 'duration': 2874,
+ }
+ }, {
+ 'url': 'http://www.youjizz.com/videos/-2189178.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youjizz.com/videos/embed/31991001',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('embed_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title')
+
+ formats = []
+
+ encodings = self._parse_json(
+ self._search_regex(
+ r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+ default='[]'),
+ video_id, fatal=False)
+ for encoding in encodings:
+ if not isinstance(encoding, dict):
+ continue
+ format_url = url_or_none(encoding.get('filename'))
+ if not format_url:
+ continue
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ format_id = encoding.get('name') or encoding.get('quality')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ if formats:
+ info_dict = {
+ 'formats': formats,
+ }
+ else:
+ # YouJizz's HTML5 player has invalid HTML
+ webpage = webpage.replace('"controls', '" controls')
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]
+
+ duration = parse_duration(self._search_regex(
+ r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration',
+ default=None))
+ uploader = self._search_regex(
+ r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader',
+ default=None)
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': self._rta_search(webpage),
+ 'duration': duration,
+ 'uploader': uploader,
+ })
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/youku.py b/hypervideo_dl/extractor/youku.py
new file mode 100644
index 0000000..880c896
--- /dev/null
+++ b/hypervideo_dl/extractor/youku.py
@@ -0,0 +1,309 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_class,
+ js_to_json,
+ str_or_none,
+ strip_jsonp,
+)
+
+
+class YoukuIE(InfoExtractor):
+ IE_NAME = 'youku'
+ IE_DESC = '优酷'
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://(
+ (?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
+ video\.tudou\.com/v/)|
+ youku:)
+ (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
+ '''
+
+ _TESTS = [{
+ # MD5 is unstable
+ 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
+ 'info_dict': {
+ 'id': 'XMTc1ODE5Njcy',
+ 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
+ 'ext': 'mp4',
+ 'duration': 74.73,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '。躲猫猫、',
+ 'uploader_id': '36017967',
+ 'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
+ 'tags': list,
+ }
+ }, {
+ 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
+ 'info_dict': {
+ 'id': 'XODgxNjg1Mzk2',
+ 'ext': 'mp4',
+ 'title': '武媚娘传奇 85',
+ 'duration': 1999.61,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '疯狂豆花',
+ 'uploader_id': '62583473',
+ 'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
+ 'info_dict': {
+ 'id': 'XMTI1OTczNDM5Mg',
+ 'ext': 'mp4',
+ 'title': '花千骨 04',
+ 'duration': 2363,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '放剧场-花千骨',
+ 'uploader_id': '772849359',
+ 'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
+ 'note': 'Video protected with password',
+ 'info_dict': {
+ 'id': 'XNjA1NzA2Njgw',
+ 'ext': 'mp4',
+ 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
+ 'duration': 7264.5,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'FoxJin1006',
+ 'uploader_id': '322014285',
+ 'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
+ 'tags': list,
+ },
+ 'params': {
+ 'videopassword': '100600',
+ },
+ }, {
+ # /play/get.json contains streams with "channel_type":"tail"
+ 'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
+ 'info_dict': {
+ 'id': 'XOTUxMzg4NDMy',
+ 'ext': 'mp4',
+ 'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
+ 'duration': 702.08,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '明月庄主moon',
+ 'uploader_id': '38465621',
+ 'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
+ 'info_dict': {
+ 'id': 'XMjIyNzAzMTQ4NA',
+ 'ext': 'mp4',
+ 'title': '卡马乔国足开大脚长传冲吊集锦',
+ 'duration': 289,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': '阿卜杜拉之星',
+ 'uploader_id': '2382249',
+ 'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def get_ysuid():
+ return '%d%s' % (int(time.time()), ''.join([
+ random.choice(string.ascii_letters) for i in range(3)]))
+
+ def get_format_name(self, fm):
+ _dict = {
+ '3gp': 'h6',
+ '3gphd': 'h5',
+ 'flv': 'h4',
+ 'flvhd': 'h4',
+ 'mp4': 'h3',
+ 'mp4hd': 'h3',
+ 'mp4hd2': 'h4',
+ 'mp4hd3': 'h4',
+ 'hd2': 'h2',
+ 'hd3': 'h1',
+ }
+ return _dict.get(fm)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
+ self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
+
+ _, urlh = self._download_webpage_handle(
+ 'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
+ # The etag header is '"foobar"'; let's remove the double quotes
+ cna = urlh.headers['etag'][1:-1]
+
+ # request basic data
+ basic_data_params = {
+ 'vid': video_id,
+ 'ccode': '0532',
+ 'client_ip': '192.168.1.1',
+ 'utid': cna,
+ 'client_ts': time.time() / 1000,
+ }
+
+ video_password = self._downloader.params.get('videopassword')
+ if video_password:
+ basic_data_params['password'] = video_password
+
+ headers = {
+ 'Referer': url,
+ }
+ headers.update(self.geo_verification_headers())
+ data = self._download_json(
+ 'https://ups.youku.com/ups/get.json', video_id,
+ 'Downloading JSON metadata',
+ query=basic_data_params, headers=headers)['data']
+
+ error = data.get('error')
+ if error:
+ error_note = error.get('note')
+ if error_note is not None and '因版权原因无法观看此视频' in error_note:
+ raise ExtractorError(
+ 'Youku said: Sorry, this video is available in China only', expected=True)
+ elif error_note and '该视频被设为私密' in error_note:
+ raise ExtractorError(
+ 'Youku said: Sorry, this video is private', expected=True)
+ else:
+ msg = 'Youku server reported error %i' % error.get('code')
+ if error_note is not None:
+ msg += ': ' + error_note
+ raise ExtractorError(msg)
+
+ # get video title
+ video_data = data['video']
+ title = video_data['title']
+
+ formats = [{
+ 'url': stream['m3u8_url'],
+ 'format_id': self.get_format_name(stream.get('stream_type')),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'filesize': int(stream.get('size')),
+ 'width': stream.get('width'),
+ 'height': stream.get('height'),
+ } for stream in data['stream'] if stream.get('channel_type') != 'tail']
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'duration': video_data.get('seconds'),
+ 'thumbnail': video_data.get('logo'),
+ 'uploader': video_data.get('username'),
+ 'uploader_id': str_or_none(video_data.get('userid')),
+ 'uploader_url': data.get('uploader', {}).get('homepage'),
+ 'tags': video_data.get('tags'),
+ }
+
+
+class YoukuShowIE(InfoExtractor):
+ _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
+ IE_NAME = 'youku:show'
+
+ _TESTS = [{
+ 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
+ 'info_dict': {
+ 'id': 'zc7c670be07ff11e48b3f',
+ 'title': '花千骨 DVD版',
+ 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
+ },
+ 'playlist_count': 50,
+ }, {
+ # Episode number not starting from 1
+ 'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
+ 'info_dict': {
+ 'id': 'zefbfbd70efbfbd780bef',
+ 'title': '超级飞侠3',
+ 'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
+ },
+ 'playlist_count': 24,
+ }, {
+ # Ongoing playlist. The initial page is the last one
+ 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
+ 'only_matching': True,
+ }, {
+ # No data-id value.
+ 'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html',
+ 'only_matching': True,
+ }, {
+ # Wrong number of reload_id.
+ 'url': 'http://list.youku.com/show/id_z20eb4acaf5c211e3b2ad.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_entries(self, playlist_data_url, show_id, note, query):
+ query['callback'] = 'cb'
+ playlist_data = self._download_json(
+ playlist_data_url, show_id, query=query, note=note,
+ transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
+ if playlist_data is None:
+ return [None, None]
+ drama_list = (get_element_by_class('p-drama-grid', playlist_data)
+ or get_element_by_class('p-drama-half-row', playlist_data))
+ if drama_list is None:
+ raise ExtractorError('No episodes found')
+ video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
+ return playlist_data, [
+ self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
+ for video_url in video_urls]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ webpage = self._download_webpage(url, show_id)
+
+ entries = []
+ page_config = self._parse_json(self._search_regex(
+ r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
+ show_id, transform_source=js_to_json)
+ first_page, initial_entries = self._extract_entries(
+ 'http://list.youku.com/show/module', show_id,
+ note='Downloading initial playlist data page',
+ query={
+ 'id': page_config['showid'],
+ 'tab': 'showInfo',
+ })
+ first_page_reload_id = self._html_search_regex(
+ r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
+ # The first reload_id has the same items as first_page
+ reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
+ entries.extend(initial_entries)
+ for idx, reload_id in enumerate(reload_ids):
+ if reload_id == first_page_reload_id:
+ continue
+ _, new_entries = self._extract_entries(
+ 'http://list.youku.com/show/episode', show_id,
+ note='Downloading playlist data page %d' % (idx + 1),
+ query={
+ 'id': page_config['showid'],
+ 'stage': reload_id,
+ })
+ if new_entries is not None:
+ entries.extend(new_entries)
+ desc = self._html_search_meta('description', webpage, fatal=False)
+ playlist_title = desc.split(',')[0] if desc else None
+ detail_li = get_element_by_class('p-intro', webpage)
+ playlist_description = get_element_by_class(
+ 'intro-more', detail_li) if detail_li else None
+
+ return self.playlist_result(
+ entries, show_id, playlist_title, playlist_description)
diff --git a/hypervideo_dl/extractor/younow.py b/hypervideo_dl/extractor/younow.py
new file mode 100644
index 0000000..04dbc87
--- /dev/null
+++ b/hypervideo_dl/extractor/younow.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
+
+
+class YouNowLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/AmandaPadeezy',
+ 'info_dict': {
+ 'id': 'AmandaPadeezy',
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'title': 'March 26, 2017',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': ['girls'],
+ 'categories': ['girls'],
+ 'uploader': 'AmandaPadeezy',
+ 'uploader_id': '6716501',
+ 'uploader_url': 'https://www.younow.com/AmandaPadeezy',
+ 'creator': 'AmandaPadeezy',
+ },
+ 'skip': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
+ else super(YouNowLiveIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ data = self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username)
+
+ if data.get('errorCode') != 0:
+ raise ExtractorError(data['errorMsg'], expected=True)
+
+ uploader = try_get(
+ data, lambda x: x['user']['profileUrlString'],
+ compat_str) or username
+
+ return {
+ 'id': uploader,
+ 'is_live': True,
+ 'title': self._live_title(uploader),
+ 'thumbnail': data.get('awsUrl'),
+ 'tags': data.get('tags'),
+ 'categories': data.get('tags'),
+ 'uploader': uploader,
+ 'uploader_id': data.get('userId'),
+ 'uploader_url': 'https://www.younow.com/%s' % username,
+ 'creator': uploader,
+ 'view_count': int_or_none(data.get('viewers')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'formats': [{
+ 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
+ % (CDN_API_BASE, data['broadcastId'], data['userId']),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ }],
+ }
+
+
+def _extract_moment(item, fatal=True):
+ moment_id = item.get('momentId')
+ if not moment_id:
+ if not fatal:
+ return
+ raise ExtractorError('Unable to extract moment id')
+
+ moment_id = compat_str(moment_id)
+
+ title = item.get('text')
+ if not title:
+ title = 'YouNow %s' % (
+ item.get('momentType') or item.get('titleType') or 'moment')
+
+ uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
+ uploader_id = try_get(item, lambda x: x['owner']['userId'])
+ uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
+
+ entry = {
+ 'extractor_key': 'YouNowMoment',
+ 'id': moment_id,
+ 'title': title,
+ 'view_count': int_or_none(item.get('views')),
+ 'like_count': int_or_none(item.get('likes')),
+ 'timestamp': int_or_none(item.get('created')),
+ 'creator': uploader,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'formats': [{
+ 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+ % (moment_id, moment_id),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }],
+ }
+
+ return entry
+
+
+class YouNowChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
+ _TEST = {
+ 'url': 'https://www.younow.com/its_Kateee_/channel',
+ 'info_dict': {
+ 'id': '14629760',
+ 'title': 'its_Kateee_ moments'
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _entries(self, username, channel_id):
+ created_before = 0
+ for page_num in itertools.count(1):
+ if created_before is None:
+ break
+ info = self._download_json(
+ '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
+ % (CDN_API_BASE, channel_id, created_before), username,
+ note='Downloading moments page %d' % page_num)
+ items = info.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_type = item.get('type')
+ if item_type == 'moment':
+ entry = _extract_moment(item, fatal=False)
+ if entry:
+ yield entry
+ elif item_type == 'collection':
+ moments = item.get('momentsIds')
+ if isinstance(moments, list):
+ for moment_id in moments:
+ m = self._download_json(
+ MOMENT_URL_FORMAT % moment_id, username,
+ note='Downloading %s moment JSON' % moment_id,
+ fatal=False)
+ if m and isinstance(m, dict) and m.get('item'):
+ entry = _extract_moment(m['item'])
+ if entry:
+ yield entry
+ created_before = int_or_none(item.get('created'))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ channel_id = compat_str(self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username, note='Downloading user information')['userId'])
+ return self.playlist_result(
+ self._entries(username, channel_id), channel_id,
+ '%s moments' % username)
+
+
+class YouNowMomentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
+ 'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
+ 'info_dict': {
+ 'id': '20712117',
+ 'ext': 'mp4',
+ 'title': 'YouNow capture',
+ 'view_count': int,
+ 'like_count': int,
+ 'timestamp': 1490432040,
+ 'upload_date': '20170325',
+ 'uploader': 'GABO...',
+ 'uploader_id': 35917228,
+ },
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url)
+ else super(YouNowMomentIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
+ return _extract_moment(item['item'])
diff --git a/hypervideo_dl/extractor/youporn.py b/hypervideo_dl/extractor/youporn.py
new file mode 100644
index 0000000..7084d3d
--- /dev/null
+++ b/hypervideo_dl/extractor/youporn.py
@@ -0,0 +1,184 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ str_to_int,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class YouPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TESTS = [{
+ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
+ 'info_dict': {
+ 'id': '505835',
+ 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
+ 'ext': 'mp4',
+ 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+ 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 210,
+ 'uploader': 'Ask Dan And Jennifer',
+ 'upload_date': '20101217',
+ 'average_rating': int,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'age_limit': 18,
+ },
+ 'skip': 'This video has been disabled',
+ }, {
+ # Unknown uploader
+ 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
+ 'info_dict': {
+ 'id': '561726',
+ 'display_id': 'big-tits-awesome-brunette-on-amazing-webcam-show',
+ 'ext': 'mp4',
+ 'title': 'Big Tits Awesome Brunette On amazing webcam show',
+ 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Unknown',
+ 'upload_date': '20110418',
+ 'average_rating': int,
+ 'view_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '404',
+ }, {
+ 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youporn.com/watch/505835',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ definitions = self._download_json(
+ 'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
+ display_id)
+
+ formats = []
+ for definition in definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = url_or_none(definition.get('videoUrl'))
+ if not video_url:
+ continue
+ f = {
+ 'url': video_url,
+ 'filesize': int_or_none(definition.get('videoSize')),
+ }
+ height = int_or_none(definition.get('quality'))
+ # Video URL's path looks like this:
+ # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
+ # We will benefit from it by extracting some metadata
+ mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
+ if mobj:
+ if not height:
+ height = int(mobj.group('height'))
+ bitrate = int(mobj.group('bitrate'))
+ f.update({
+ 'format_id': '%dp-%dk' % (height, bitrate),
+ 'tbr': bitrate,
+ })
+ f['height'] = height
+ formats.append(f)
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'http://www.youporn.com/watch/%s' % video_id, display_id,
+ headers={'Cookie': 'age_verified=1'})
+
+ title = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
+ webpage, 'description',
+ default=None) or self._og_search_description(
+ webpage, default=None)
+ thumbnail = self._search_regex(
+ r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
+ webpage, 'uploader', fatal=False)
+ upload_date = unified_strdate(self._html_search_regex(
+ [r'UPLOADED:\s*<span>([^<]+)',
+ r'Date\s+[Aa]dded:\s*<span>([^<]+)',
+ r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+ webpage, 'upload date', fatal=False))
+
+ age_limit = self._rta_search(webpage)
+
+ view_count = None
+ views = self._search_regex(
+ r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
+ 'views', default=None)
+ if views:
+ view_count = str_to_int(extract_attributes(views).get('data-value'))
+ comment_count = str_to_int(self._search_regex(
+ r'>All [Cc]omments? \(([\d,.]+)\)',
+ webpage, 'comment count', default=None))
+
+ def extract_tag_box(regex, title):
+ tag_box = self._search_regex(regex, webpage, title, default=None)
+ if not tag_box:
+ return []
+ return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
+
+ categories = extract_tag_box(
+ r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
+ tags = extract_tag_box(
+ r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
+ 'tags')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'tags': tags,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/yourporn.py b/hypervideo_dl/extractor/yourporn.py
new file mode 100644
index 0000000..9834749
--- /dev/null
+++ b/hypervideo_dl/extractor/yourporn.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_duration,
+ urljoin,
+)
+
+
+class YourPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
+ _TESTS = [{
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
+ 'info_dict': {
+ 'id': '57ffcb2e1179b',
+ 'ext': 'mp4',
+ 'title': 'md5:c9f43630bd968267672651ba905a7d35',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 165,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ parts = self._parse_json(
+ self._search_regex(
+ r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
+ group='data'),
+ video_id)[video_id].split('/')
+
+ num = 0
+ for c in parts[6] + parts[7]:
+ if c.isnumeric():
+ num += int(c)
+ parts[5] = compat_str(int(parts[5]) - num)
+ parts[1] += '8'
+ video_url = urljoin(url, '/'.join(parts))
+
+ title = (self._search_regex(
+ r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
+ default=None) or self._og_search_description(webpage)).strip()
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
+ default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'ext': 'mp4',
+ }
diff --git a/hypervideo_dl/extractor/yourupload.py b/hypervideo_dl/extractor/yourupload.py
new file mode 100644
index 0000000..9fa7728
--- /dev/null
+++ b/hypervideo_dl/extractor/yourupload.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import urljoin
+
+
+class YourUploadIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://yourupload.com/watch/14i14h',
+ 'md5': '5e2c63385454c557f97c4c4131a393cd',
+ 'info_dict': {
+ 'id': '14i14h',
+ 'ext': 'mp4',
+ 'title': 'BigBuckBunny_320x180.mp4',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ }, {
+ 'url': 'http://www.yourupload.com/embed/14i14h',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://embed.yourupload.com/14i14h',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ embed_url = 'http://www.yourupload.com/embed/%s' % video_id
+
+ webpage = self._download_webpage(embed_url, video_id)
+
+ title = self._og_search_title(webpage)
+ video_url = urljoin(embed_url, self._og_search_video_url(webpage))
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'http_headers': {
+ 'Referer': embed_url,
+ },
+ }
diff --git a/hypervideo_dl/extractor/youtube.py b/hypervideo_dl/extractor/youtube.py
new file mode 100644
index 0000000..2272a02
--- /dev/null
+++ b/hypervideo_dl/extractor/youtube.py
@@ -0,0 +1,3239 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import itertools
+import json
+import os.path
+import random
+import re
+import traceback
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..compat import (
+ compat_chr,
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+)
+from ..jsinterp import JSInterpreter
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ dict_get,
+ float_or_none,
+ int_or_none,
+ mimetype2ext,
+ parse_codecs,
+ parse_duration,
+ qualities,
+ remove_start,
+ smuggle_url,
+ str_or_none,
+ str_to_int,
+ try_get,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ update_url_query,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+def parse_qs(url):
+ return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+
+
+class YoutubeBaseInfoExtractor(InfoExtractor):
+ """Provide base functions for Youtube extractors"""
+ _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
+ _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
+
+ _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
+ _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
+ _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+
+ _NETRC_MACHINE = 'youtube'
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = False
+
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
+
+ def _login(self):
+ """
+ Attempt to log in to YouTube.
+ True is returned if successful or skipped.
+ False is returned if login failed.
+
+ If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
+ """
+ username, password = self._get_login_info()
+ # No authentication to be performed
+ if username is None:
+ if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ return True
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ note='Downloading login page',
+ errnote='unable to fetch login page', fatal=False)
+ if login_page is False:
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ def req(url, f_req, note, errnote):
+ data = login_form.copy()
+ data.update({
+ 'pstMsg': 1,
+ 'checkConnection': 'youtube',
+ 'checkedDomains': 'youtube',
+ 'hl': 'en',
+ 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
+ 'f.req': json.dumps(f_req),
+ 'flowName': 'GlifWebSignIn',
+ 'flowEntry': 'ServiceLogin',
+ # TODO: reverse actual botguard identifier generation algo
+ 'bgRequest': '["identifier",""]',
+ })
+ return self._download_json(
+ url, None, note=note, errnote=errnote,
+ transform_source=lambda s: re.sub(r'^[^[]*', '', s),
+ fatal=False,
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
+ 'Google-Accounts-XSRF': 1,
+ })
+
+ def warn(message):
+ self._downloader.report_warning(message)
+
+ lookup_req = [
+ username,
+ None, [], None, 'US', None, None, 2, False, True,
+ [
+ None, None,
+ [2, 1, None, 1,
+ 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
+ None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ],
+ username,
+ ]
+
+ lookup_results = req(
+ self._LOOKUP_URL, lookup_req,
+ 'Looking up account info', 'Unable to look up account info')
+
+ if lookup_results is False:
+ return False
+
+ user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
+ if not user_hash:
+ warn('Unable to extract user hash')
+ return False
+
+ challenge_req = [
+ user_hash,
+ None, 1, None, [1, None, None, None, [password, None, True]],
+ [
+ None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ]]
+
+ challenge_results = req(
+ self._CHALLENGE_URL, challenge_req,
+ 'Logging in', 'Unable to log in')
+
+ if challenge_results is False:
+ return
+
+ login_res = try_get(challenge_results, lambda x: x[0][5], list)
+ if login_res:
+ login_msg = try_get(login_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to login: %s' % 'Invalid password'
+ if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
+ return False
+
+ res = try_get(challenge_results, lambda x: x[0][-1], list)
+ if not res:
+ warn('Unable to extract result entry')
+ return False
+
+ login_challenge = try_get(res, lambda x: x[0][0], list)
+ if login_challenge:
+ challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
+ if challenge_str == 'TWO_STEP_VERIFICATION':
+ # SEND_SUCCESS - TFA code has been successfully sent to phone
+ # QUOTA_EXCEEDED - reached the limit of TFA codes
+ status = try_get(login_challenge, lambda x: x[5], compat_str)
+ if status == 'QUOTA_EXCEEDED':
+ warn('Exceeded the limit of TFA codes, try later')
+ return False
+
+ tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
+ if not tl:
+ warn('Unable to extract TL')
+ return False
+
+ tfa_code = self._get_tfa_info('2-step verification code')
+
+ if not tfa_code:
+ warn(
+ 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
+ '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+ return False
+
+ tfa_code = remove_start(tfa_code, 'G-')
+
+ tfa_req = [
+ user_hash, None, 2, None,
+ [
+ 9, None, None, None, None, None, None, None,
+ [None, tfa_code, True, 2]
+ ]]
+
+ tfa_results = req(
+ self._TFA_URL.format(tl), tfa_req,
+ 'Submitting TFA code', 'Unable to submit TFA code')
+
+ if tfa_results is False:
+ return False
+
+ tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
+ if tfa_res:
+ tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to finish TFA: %s' % 'Invalid TFA code'
+ if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
+ return False
+
+ check_cookie_url = try_get(
+ tfa_results, lambda x: x[0][-1][2], compat_str)
+ else:
+ CHALLENGES = {
+ 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
+ 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
+ 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
+ }
+ challenge = CHALLENGES.get(
+ challenge_str,
+ '%s returned error %s.' % (self.IE_NAME, challenge_str))
+ warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
+ return False
+ else:
+ check_cookie_url = try_get(res, lambda x: x[2], compat_str)
+
+ if not check_cookie_url:
+ warn('Unable to extract CheckCookie URL')
+ return False
+
+ check_cookie_results = self._download_webpage(
+ check_cookie_url, None, 'Checking cookie', fatal=False)
+
+ if check_cookie_results is False:
+ return False
+
+ if 'https://myaccount.google.com/' not in check_cookie_results:
+ warn('Unable to log in')
+ return False
+
+ return True
+
+ def _initialize_consent(self):
+ cookies = self._get_cookies('https://www.youtube.com/')
+ if cookies.get('__Secure-3PSID'):
+ return
+ consent_id = None
+ consent = cookies.get('CONSENT')
+ if consent:
+ if 'YES' in consent.value:
+ return
+ consent_id = self._search_regex(
+ r'PENDING\+(\d+)', consent.value, 'consent', default=None)
+ if not consent_id:
+ consent_id = random.randint(100, 999)
+ self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+
+ def _real_initialize(self):
+ self._initialize_consent()
+ if self._downloader is None:
+ return
+ if not self._login():
+ return
+
+ _DEFAULT_API_DATA = {
+ 'context': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20201021.03.00',
+ }
+ },
+ }
+
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
+ _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
+
+ def _call_api(self, ep, query, video_id, fatal=True):
+ data = self._DEFAULT_API_DATA.copy()
+ data.update(query)
+
+ return self._download_json(
+ 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
+ note='Downloading API JSON', errnote='Unable to download API page',
+ data=json.dumps(data).encode('utf8'), fatal=fatal,
+ headers={'content-type': 'application/json'},
+ query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
+
+ def _extract_yt_initial_data(self, video_id, webpage):
+ return self._parse_json(
+ self._search_regex(
+ (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
+ self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
+ video_id)
+
+ def _extract_ytcfg(self, video_id, webpage):
+ return self._parse_json(
+ self._search_regex(
+ r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
+ default='{}'), video_id, fatal=False) or {}
+
+ def _extract_video(self, renderer):
+ video_id = renderer['videoId']
+ title = try_get(
+ renderer,
+ (lambda x: x['title']['runs'][0]['text'],
+ lambda x: x['title']['simpleText']), compat_str)
+ description = try_get(
+ renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
+ compat_str)
+ duration = parse_duration(try_get(
+ renderer, lambda x: x['lengthText']['simpleText'], compat_str))
+ view_count_text = try_get(
+ renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+ view_count = str_to_int(self._search_regex(
+ r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
+ 'view count', default=None))
+ uploader = try_get(
+ renderer,
+ (lambda x: x['ownerText']['runs'][0]['text'],
+ lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
+ return {
+ '_type': 'url',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': video_id,
+ 'url': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'uploader': uploader,
+ }
+
+
+class YoutubeIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com'
+ _INVIDIOUS_SITES = (
+ # invidious-redirect websites
+ r'(?:www\.)?redirect\.invidious\.io',
+ r'(?:(?:www|dev)\.)?invidio\.us',
+ # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
+ r'(?:(?:www|no)\.)?invidiou\.sh',
+ r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
+ r'(?:www\.)?invidious\.kabi\.tk',
+ r'(?:www\.)?invidious\.13ad\.de',
+ r'(?:www\.)?invidious\.mastodon\.host',
+ r'(?:www\.)?invidious\.zapashcanon\.fr',
+ r'(?:www\.)?invidious\.kavin\.rocks',
+ r'(?:www\.)?invidious\.tinfoil-hat\.net',
+ r'(?:www\.)?invidious\.himiko\.cloud',
+ r'(?:www\.)?invidious\.reallyancient\.tech',
+ r'(?:www\.)?invidious\.tube',
+ r'(?:www\.)?invidiou\.site',
+ r'(?:www\.)?invidious\.site',
+ r'(?:www\.)?invidious\.xyz',
+ r'(?:www\.)?invidious\.nixnet\.xyz',
+ r'(?:www\.)?invidious\.048596\.xyz',
+ r'(?:www\.)?invidious\.drycat\.fr',
+ r'(?:www\.)?inv\.skyn3t\.in',
+ r'(?:www\.)?tube\.poal\.co',
+ r'(?:www\.)?tube\.connect\.cafe',
+ r'(?:www\.)?vid\.wxzm\.sx',
+ r'(?:www\.)?vid\.mint\.lgbt',
+ r'(?:www\.)?vid\.puffyan\.us',
+ r'(?:www\.)?yewtu\.be',
+ r'(?:www\.)?yt\.elukerio\.org',
+ r'(?:www\.)?yt\.lelux\.fi',
+ r'(?:www\.)?invidious\.ggc-project\.de',
+ r'(?:www\.)?yt\.maisputain\.ovh',
+ r'(?:www\.)?ytprivate\.com',
+ r'(?:www\.)?invidious\.13ad\.de',
+ r'(?:www\.)?invidious\.toot\.koeln',
+ r'(?:www\.)?invidious\.fdn\.fr',
+ r'(?:www\.)?watch\.nettohikari\.com',
+ r'(?:www\.)?kgg2m7yk5aybusll\.onion',
+ r'(?:www\.)?qklhadlycap4cnod\.onion',
+ r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
+ r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
+ r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
+ r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
+ r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
+ r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
+ )
+ _VALID_URL = r"""(?x)^
+ (
+ (?:https?://|//) # http(s):// or protocol-independent URL
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
+ (?:www\.)?deturl\.com/www\.youtube\.com|
+ (?:www\.)?pwnyoutube\.com|
+ (?:www\.)?hooktube\.com|
+ (?:www\.)?yourepeat\.com|
+ tube\.majestyc\.net|
+ %(invidious)s|
+ youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
+ (?:.*?\#/)? # handle anchor (#/) redirect urls
+ (?: # the various things that can precede the ID:
+ (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
+ |(?: # or the v= param in all its forms
+ (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ (?:\?|\#!?) # the params delimiter ? or # or #!
+ (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
+ v=
+ )
+ ))
+ |(?:
+ youtu\.be| # just youtu.be/xxxx
+ vid\.plus| # or vid.plus/xxxx
+ zwearz\.com/watch| # or zwearz.com/watch/xxxx
+ %(invidious)s
+ )/
+ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
+ )
+ )? # all until now is optional -> you can pass the naked ID
+ (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
+ (?(1).+)? # if we found the ID, everything can follow
+ $""" % {
+ 'invidious': '|'.join(_INVIDIOUS_SITES),
+ }
+ _PLAYER_INFO_RE = (
+ r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
+ r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
+ r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
+ )
+ _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+
+ _GEO_BYPASS = False
+
+ IE_NAME = 'youtube'
+ _TESTS = [
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'hypervideo test video "\'/\\ä↭𝕐',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+ 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+ 'upload_date': '20121002',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for hypervideo.\n\nFor more information, contact phihag@phihag.de .',
+ 'categories': ['Science & Technology'],
+ 'tags': ['hypervideo'],
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'start_time': 1,
+ 'end_time': 9,
+ }
+ },
+ {
+ 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
+ 'note': 'Embed-only video (#1746)',
+ 'info_dict': {
+ 'id': 'yZIXLfi8CZQ',
+ 'ext': 'mp4',
+ 'upload_date': '20120608',
+ 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
+ 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
+ 'uploader': 'SET India',
+ 'uploader_id': 'setindia',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
+ 'age_limit': 18,
+ },
+ 'skip': 'Private video',
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
+ 'note': 'Use the first video ID in the URL',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'hypervideo test video "\'/\\ä↭𝕐',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'upload_date': '20121002',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for hypervideo.\n\nFor more information, contact phihag@phihag.de .',
+ 'categories': ['Science & Technology'],
+ 'tags': ['hypervideo'],
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
+ 'note': '256k DASH audio (format 141) via DASH manifest',
+ 'info_dict': {
+ 'id': 'a9LDPn-MO4I',
+ 'ext': 'm4a',
+ 'upload_date': '20121002',
+ 'uploader_id': '8KVIDEO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
+ 'description': '',
+ 'uploader': '8KVIDEO',
+ 'title': 'UHDTV TEST 8K VIDEO.mp4'
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141',
+ },
+ 'skip': 'format 141 not served anymore',
+ },
+ # DASH manifest with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+ 'info_dict': {
+ 'id': 'IB3lcPjvWLA',
+ 'ext': 'm4a',
+ 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
+ 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
+ 'duration': 244,
+ 'uploader': 'AfrojackVEVO',
+ 'uploader_id': 'AfrojackVEVO',
+ 'upload_date': '20131011',
+ 'abr': 129.495,
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # Controversy video
+ {
+ 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+ 'info_dict': {
+ 'id': 'T4XJQO3qol8',
+ 'ext': 'mp4',
+ 'duration': 219,
+ 'upload_date': '20100909',
+ 'uploader': 'Amazing Atheist',
+ 'uploader_id': 'TheAmazingAtheist',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+ 'title': 'Burning Everyone\'s Koran',
+ 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+ }
+ },
+ # Normal age-gate video (No vevo, embed allowed), available via embed page
+ {
+ 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
+ 'info_dict': {
+ 'id': 'HtVdAasjOgU',
+ 'ext': 'mp4',
+ 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+ 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'duration': 142,
+ 'uploader': 'The Witcher',
+ 'uploader_id': 'WitcherGame',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+ 'upload_date': '20140605',
+ 'age_limit': 18,
+ },
+ },
+ {
+ # Age-gated video only available with authentication (unavailable
+ # via embed page workaround)
+ 'url': 'XgnwCQzjau8',
+ 'only_matching': True,
+ },
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'duration': 266,
+ 'upload_date': '20100430',
+ 'uploader_id': 'deadmau5',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+ 'creator': 'deadmau5',
+ 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
+ 'uploader': 'deadmau5',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'Some Chords',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
+ # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'duration': 6085,
+ 'upload_date': '20150827',
+ 'uploader_id': 'olympic',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
+ 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'uploader': 'Olympic',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
+ # Non-square pixels
+ {
+ 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
+ 'info_dict': {
+ 'id': '_b-2C3KPAM0',
+ 'ext': 'mp4',
+ 'stretched_ratio': 16 / 9.,
+ 'duration': 85,
+ 'upload_date': '20110310',
+ 'uploader_id': 'AllenMeow',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+ 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
+ 'uploader': '孫ᄋᄅ',
+ 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
+ },
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'webm',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ },
+ 'skip': 'This live event has ended.',
+ },
+ # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
+ {
+ 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+ 'info_dict': {
+ 'id': 'FIl7x6_3R5Y',
+ 'ext': 'webm',
+ 'title': 'md5:7b81415841e02ecd4313668cde88737a',
+ 'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'duration': 220,
+ 'upload_date': '20150625',
+ 'uploader_id': 'dorappi2000',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
+ 'uploader': 'dorappi2000',
+ 'formats': 'mincount:31',
+ },
+ 'skip': 'not actual anymore',
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'uploader': 'Airtek',
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ },
+ 'skip': 'This live event has ended.',
+ },
+ {
+ # Multifeed videos (multiple cameras), URL is for Main Camera
+ 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
+ 'info_dict': {
+ 'id': 'jvGDaLqkpTg',
+ 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
+ 'description': 'md5:e03b909557865076822aa169218d6a5d',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jvGDaLqkpTg',
+ 'ext': 'mp4',
+ 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
+ 'description': 'md5:e03b909557865076822aa169218d6a5d',
+ 'duration': 10643,
+ 'upload_date': '20161111',
+ 'uploader': 'Team PGP',
+ 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '3AKt1R1aDnw',
+ 'ext': 'mp4',
+ 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
+ 'description': 'md5:e03b909557865076822aa169218d6a5d',
+ 'duration': 10991,
+ 'upload_date': '20161111',
+ 'uploader': 'Team PGP',
+ 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'RtAMM00gpVc',
+ 'ext': 'mp4',
+ 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
+ 'description': 'md5:e03b909557865076822aa169218d6a5d',
+ 'duration': 10995,
+ 'upload_date': '20161111',
+ 'uploader': 'Team PGP',
+ 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6N2fdlP3C5U',
+ 'ext': 'mp4',
+ 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
+ 'description': 'md5:e03b909557865076822aa169218d6a5d',
+ 'duration': 10990,
+ 'upload_date': '20161111',
+ 'uploader': 'Team PGP',
+ 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
+ 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+ 'info_dict': {
+ 'id': 'gVfLd0zydlo',
+ 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+ },
+ 'playlist_count': 2,
+ 'skip': 'Not multifeed anymore',
+ },
+ {
+ 'url': 'https://vid.plus/FlRa-iH7PGw',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+ 'only_matching': True,
+ },
+ {
+ # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/ytdl-org/youtube-dl/issues/1892,
+ # https://github.com/ytdl-org/youtube-dl/issues/8164)
+ 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+ 'info_dict': {
+ 'id': 'lsguqyKfVQg',
+ 'ext': 'mp4',
+ 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+ 'alt_title': 'Dark Walk - Position Music',
+ 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+ 'duration': 133,
+ 'upload_date': '20151119',
+ 'uploader_id': 'IronSoulElf',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+ 'uploader': 'IronSoulElf',
+ 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk - Position Music',
+ 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
+ 'only_matching': True,
+ },
+ {
+ # Video with yt:stretch=17:0
+ 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
+ 'info_dict': {
+ 'id': 'Q39EVAstoRM',
+ 'ext': 'mp4',
+ 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
+ 'description': 'md5:ee18a25c350637c8faff806845bddee9',
+ 'upload_date': '20151107',
+ 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
+ 'uploader': 'CH GAMER DROID',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video does not exist.',
+ },
+ {
+ # Video with incomplete 'yt:stretch=16:'
+ 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
+ 'only_matching': True,
+ },
+ {
+ # Video licensed under Creative Commons
+ 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+ 'info_dict': {
+ 'id': 'M4gD1WSo5mA',
+ 'ext': 'mp4',
+ 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+ 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+ 'duration': 721,
+ 'upload_date': '20150127',
+ 'uploader_id': 'BerkmanCenter',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+ 'uploader': 'The Berkman Klein Center for Internet & Society',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Channel-like uploader_url
+ 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+ 'info_dict': {
+ 'id': 'eQcmzGIKrzg',
+ 'ext': 'mp4',
+ 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+ 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
+ 'duration': 4060,
+ 'upload_date': '20151119',
+ 'uploader': 'Bernie Sanders',
+ 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
+ 'only_matching': True,
+ },
+ {
+ # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
+ 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
+ 'only_matching': True,
+ },
+ {
+ # Rental video preview
+ 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+ 'info_dict': {
+ 'id': 'uGpuVWrhIzE',
+ 'ext': 'mp4',
+ 'title': 'Piku - Trailer',
+ 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+ 'upload_date': '20150811',
+ 'uploader': 'FlixMatrix',
+ 'uploader_id': 'FlixMatrixKaravan',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # YouTube Red video with episode data
+ 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
+ 'info_dict': {
+ 'id': 'iqKdEhx-dD4',
+ 'ext': 'mp4',
+ 'title': 'Isolation - Mind Field (Ep 1)',
+ 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
+ 'duration': 2085,
+ 'upload_date': '20170118',
+ 'uploader': 'Vsauce',
+ 'uploader_id': 'Vsauce',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+ 'series': 'Mind Field',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Skipping DASH manifest',
+ ],
+ },
+ {
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ 'uploader': 'New Century Foundation',
+ 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
+ },
+ {
+ # itag 212
+ 'url': '1t24XAntNCY',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ },
+ {
+ # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
+ 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
+ 'only_matching': True,
+ },
+ {
+ # DRM protected
+ 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+ 'only_matching': True,
+ },
+ {
+ # Video with unsupported adaptive stream type formats
+ 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
+ 'info_dict': {
+ 'id': 'Z4Vy8R84T1U',
+ 'ext': 'mp4',
+ 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 433,
+ 'upload_date': '20130923',
+ 'uploader': 'Amelia Putri Harwita',
+ 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
+ 'formats': 'maxcount:10',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'youtube_include_dash_manifest': False,
+ },
+ 'skip': 'not actual anymore',
+ },
+ {
+ # Youtube Music Auto-generated description
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'uploader': 'Stephen - Topic',
+ 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
+ 'artist': 'Stephen',
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'release_year': 2019,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
+ 'only_matching': True,
+ },
+ {
+ # invalid -> valid video id redirection
+ 'url': 'DJztXj2GPfl',
+ 'info_dict': {
+ 'id': 'DJztXj2GPfk',
+ 'ext': 'mp4',
+ 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
+ 'description': 'md5:bf577a41da97918e94fa9798d9228825',
+ 'upload_date': '20090125',
+ 'uploader': 'Prochorowka',
+ 'uploader_id': 'Prochorowka',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
+ 'artist': 'Panjabi MC',
+ 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
+ 'album': 'Beware of the Boys (Mundian To Bach Ke)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video unavailable',
+ },
+ {
+ # empty description results in an empty string
+ 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
+ 'info_dict': {
+ 'id': 'x41yOUIvK2k',
+ 'ext': 'mp4',
+ 'title': 'IMG 3456',
+ 'description': '',
+ 'upload_date': '20170613',
+ 'uploader_id': 'ElevageOrVert',
+ 'uploader': 'ElevageOrVert',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # with '};' inside yt initial data (see [1])
+ # see [2] for an example with '};' inside ytInitialPlayerResponse
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
+ 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
+ 'info_dict': {
+ 'id': 'CHqg6qOn4no',
+ 'ext': 'mp4',
+ 'title': 'Part 77 Sort a list of simple types in c#',
+ 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
+ 'upload_date': '20130831',
+ 'uploader_id': 'kudvenkat',
+ 'uploader': 'kudvenkat',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # another example of '};' in ytInitialData
+ 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
+ 'only_matching': True,
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/pull/28094
+ 'url': 'OtqTfy26tG0',
+ 'info_dict': {
+ 'id': 'OtqTfy26tG0',
+ 'ext': 'mp4',
+ 'title': 'Burn Out',
+ 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
+ 'upload_date': '20141120',
+ 'uploader': 'The Cinematic Orchestra - Topic',
+ 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
+ 'artist': 'The Cinematic Orchestra',
+ 'track': 'Burn Out',
+ 'album': 'Every Day',
+ 'release_data': None,
+ 'release_year': None,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # controversial video, only works with bpctr when authenticated with cookies
+ 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
+ 'only_matching': True,
+ },
+ {
+ # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
+ 'url': 'cBvYw8_A0vQ',
+ 'info_dict': {
+ 'id': 'cBvYw8_A0vQ',
+ 'ext': 'mp4',
+ 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
+ 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
+ 'upload_date': '20201120',
+ 'uploader': 'Walk around Japan',
+ 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ ]
+ _formats = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
+
+ # RTMP (unnamed)
+ '_rtmp': {'protocol': 'rtmp'},
+
+ # av01 video only formats sometimes served with "unknown" codecs
+ '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ # Hack for lazy extractors until more generic solution is implemented
+ # (see #28780)
+ from .youtube import parse_qs
+ qs = parse_qs(url)
+ if qs.get('list', [None])[0]:
+ return False
+ return super(YoutubeIE, cls).suitable(url)
+
+ def __init__(self, *args, **kwargs):
+ super(YoutubeIE, self).__init__(*args, **kwargs)
+ self._code_cache = {}
+ self._player_cache = {}
+
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+ @classmethod
+ def _extract_player_info(cls, player_url):
+ for player_re in cls._PLAYER_INFO_RE:
+ id_m = re.search(player_re, player_url)
+ if id_m:
+ break
+ else:
+ raise ExtractorError('Cannot identify player %r' % player_url)
+ return id_m.group('id')
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
+ player_id = self._extract_player_info(player_url)
+
+ # Read from filesystem cache
+ func_id = 'js_%s_%s' % (
+ player_id, self._signature_cache_id(example_sig))
+ assert os.path.basename(func_id) == func_id
+
+ cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
+ if cache_spec is not None:
+ return lambda s: ''.join(s[i] for i in cache_spec)
+
+ if player_id not in self._code_cache:
+ self._code_cache[player_id] = self._download_webpage(
+ player_url, video_id,
+ note='Downloading player ' + player_id,
+ errnote='Download of %s failed' % player_url)
+ code = self._code_cache[player_id]
+ res = self._parse_sig_js(code)
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = res(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+
+ self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ return res
+
+ def _print_sig_code(self, func, example_sig):
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = '' if start == 0 else str(start)
+ ends = (':%d' % (end + step)) if end + step >= 0 else ':'
+ steps = '' if step == 1 else (':%d' % step)
+ return 's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ # Quelch pyflakes warnings - start will be set when step is set
+ start = '(Never used)'
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield 's[%d]' % prev
+ if step is None:
+ yield 's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = func(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = ' + '.join(gen_sig_code(cache_spec))
+ signature_id_tuple = '(%s)' % (
+ ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ ' return %s\n') % (signature_id_tuple, expr_code)
+ self.to_screen('Extracted signature function:\n' + code)
+
+ def _parse_sig_js(self, jscode):
+ funcname = self._search_regex(
+ (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
+ r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ # Obsolete patterns
+ r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname)
+ return lambda s: initial_function([s])
+
+ def _decrypt_signature(self, s, video_id, player_url):
+ """Turn the encrypted s field into a working signature"""
+
+ if player_url is None:
+ raise ExtractorError('Cannot decrypt signature without player_url')
+
+ if player_url.startswith('//'):
+ player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
+ try:
+ player_id = (player_url, self._signature_cache_id(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, s
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, s)
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ 'Signature extraction failed: ' + tb, cause=e)
+
+ def _mark_watched(self, video_id, player_response):
+ playback_url = url_or_none(try_get(
+ player_response,
+ lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
+ if not playback_url:
+ return
+ parsed_playback_url = compat_urlparse.urlparse(playback_url)
+ qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ })
+ playback_url = compat_urlparse.urlunparse(
+ parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ self._download_webpage(
+ playback_url, video_id, 'Marking watched',
+ 'Unable to mark watched', fatal=False)
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def extract_id(cls, url):
+ mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ video_id = mobj.group(2)
+ return video_id
+
+ def _extract_chapters_from_json(self, data, video_id, duration):
+ chapters_list = try_get(
+ data,
+ lambda x: x['playerOverlays']
+ ['playerOverlayRenderer']
+ ['decoratedPlayerBarRenderer']
+ ['decoratedPlayerBarRenderer']
+ ['playerBar']
+ ['chapteredPlayerBarRenderer']
+ ['chapters'],
+ list)
+ if not chapters_list:
+ return
+
+ def chapter_time(chapter):
+ return float_or_none(
+ try_get(
+ chapter,
+ lambda x: x['chapterRenderer']['timeRangeStartMillis'],
+ int),
+ scale=1000)
+ chapters = []
+ for next_num, chapter in enumerate(chapters_list, start=1):
+ start_time = chapter_time(chapter)
+ if start_time is None:
+ continue
+ end_time = (chapter_time(chapters_list[next_num])
+ if next_num < len(chapters_list) else duration)
+ if end_time is None:
+ continue
+ title = try_get(
+ chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
+ compat_str)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': title,
+ })
+ return chapters
+
+ def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
+ return self._parse_json(self._search_regex(
+ (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
+ regex), webpage, name, default='{}'), video_id, fatal=False)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+ base_url = self.http_scheme() + '//www.youtube.com/'
+ webpage_url = base_url + 'watch?v=' + video_id
+ webpage = self._download_webpage(
+ webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+
+ player_response = None
+ if webpage:
+ player_response = self._extract_yt_initial_variable(
+ webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
+ video_id, 'initial player response')
+ if not player_response:
+ player_response = self._call_api(
+ 'player', {'videoId': video_id}, video_id)
+
+ playability_status = player_response.get('playabilityStatus') or {}
+ if playability_status.get('reason') == 'Sign in to confirm your age':
+ pr = self._parse_json(try_get(compat_parse_qs(
+ self._download_webpage(
+ base_url + 'get_video_info', video_id,
+ 'Refetching age-gated info webpage',
+ 'unable to download video info webpage', query={
+ 'video_id': video_id,
+ 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+ 'html5': 1,
+ }, fatal=False)),
+ lambda x: x['player_response'][0],
+ compat_str) or '{}', video_id)
+ if pr:
+ player_response = pr
+
+ trailer_video_id = try_get(
+ playability_status,
+ lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
+ compat_str)
+ if trailer_video_id:
+ return self.url_result(
+ trailer_video_id, self.ie_key(), trailer_video_id)
+
+ def get_text(x):
+ if not x:
+ return
+ text = x.get('simpleText')
+ if text and isinstance(text, compat_str):
+ return text
+ runs = x.get('runs')
+ if not isinstance(runs, list):
+ return
+ return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
+
+ search_meta = (
+ lambda x: self._html_search_meta(x, webpage, default=None)) \
+ if webpage else lambda x: None
+
+ video_details = player_response.get('videoDetails') or {}
+ microformat = try_get(
+ player_response,
+ lambda x: x['microformat']['playerMicroformatRenderer'],
+ dict) or {}
+ video_title = video_details.get('title') \
+ or get_text(microformat.get('title')) \
+ or search_meta(['og:title', 'twitter:title', 'title'])
+ video_description = video_details.get('shortDescription')
+
+ if not smuggled_data.get('force_singlefeed', False):
+ if not self._downloader.params.get('noplaylist'):
+ multifeed_metadata_list = try_get(
+ player_response,
+ lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
+ compat_str)
+ if multifeed_metadata_list:
+ entries = []
+ feed_ids = []
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(
+ compat_urllib_parse_unquote_plus(feed))
+
+ def feed_entry(name):
+ return try_get(
+ feed_data, lambda x: x[name][0], compat_str)
+
+ feed_id = feed_entry('id')
+ if not feed_id:
+ continue
+ feed_title = feed_entry('title')
+ title = video_title
+ if feed_title:
+ title += ' (%s)' % feed_title
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ base_url + 'watch?v=' + feed_data['id'][0],
+ {'force_singlefeed': True}),
+ 'title': title,
+ })
+ feed_ids.append(feed_id)
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(
+ entries, video_id, video_title, video_description)
+ else:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ formats = []
+ itags = []
+ itag_qualities = {}
+ player_url = None
+ q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
+ streaming_data = player_response.get('streamingData') or {}
+ streaming_formats = streaming_data.get('formats') or []
+ streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
+ for fmt in streaming_formats:
+ if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
+ continue
+
+ itag = str_or_none(fmt.get('itag'))
+ quality = fmt.get('quality')
+ if itag and quality:
+ itag_qualities[itag] = quality
+ # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
+ # (adding `&sq=0` to the URL) and parsing emsg box to determine the
+ # number of fragment that would subsequently requested with (`&sq=N`)
+ if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
+ continue
+
+ fmt_url = fmt.get('url')
+ if not fmt_url:
+ sc = compat_parse_qs(fmt.get('signatureCipher'))
+ fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
+ encrypted_sig = try_get(sc, lambda x: x['s'][0])
+ if not (sc and fmt_url and encrypted_sig):
+ continue
+ if not player_url:
+ if not webpage:
+ continue
+ player_url = self._search_regex(
+ r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
+ webpage, 'player URL', fatal=False)
+ if not player_url:
+ continue
+ signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
+ sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
+ fmt_url += '&' + sp + '=' + signature
+
+ if itag:
+ itags.append(itag)
+ tbr = float_or_none(
+ fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
+ dct = {
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt.get('contentLength')),
+ 'format_id': itag,
+ 'format_note': fmt.get('qualityLabel') or quality,
+ 'fps': int_or_none(fmt.get('fps')),
+ 'height': int_or_none(fmt.get('height')),
+ 'quality': q(quality),
+ 'tbr': tbr,
+ 'url': fmt_url,
+ 'width': fmt.get('width'),
+ }
+ mimetype = fmt.get('mimeType')
+ if mimetype:
+ mobj = re.match(
+ r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
+ if mobj:
+ dct['ext'] = mimetype2ext(mobj.group(1))
+ dct.update(parse_codecs(mobj.group(2)))
+ no_audio = dct.get('acodec') == 'none'
+ no_video = dct.get('vcodec') == 'none'
+ if no_audio:
+ dct['vbr'] = tbr
+ if no_video:
+ dct['abr'] = tbr
+ if no_audio or no_video:
+ dct['downloader_options'] = {
+ # Youtube throttles chunks >~10M
+ 'http_chunk_size': 10485760,
+ }
+ if dct.get('ext'):
+ dct['container'] = dct['ext'] + '_dash'
+ formats.append(dct)
+
+ hls_manifest_url = streaming_data.get('hlsManifestUrl')
+ if hls_manifest_url:
+ for f in self._extract_m3u8_formats(
+ hls_manifest_url, video_id, 'mp4', fatal=False):
+ itag = self._search_regex(
+ r'/itag/(\d+)', f['url'], 'itag', default=None)
+ if itag:
+ f['format_id'] = itag
+ formats.append(f)
+
+ if self._downloader.params.get('youtube_include_dash_manifest', True):
+ dash_manifest_url = streaming_data.get('dashManifestUrl')
+ if dash_manifest_url:
+ for f in self._extract_mpd_formats(
+ dash_manifest_url, video_id, fatal=False):
+ itag = f['format_id']
+ if itag in itags:
+ continue
+ if itag in itag_qualities:
+ f['quality'] = q(itag_qualities[itag])
+ filesize = int_or_none(self._search_regex(
+ r'/clen/(\d+)', f.get('fragment_base_url')
+ or f['url'], 'file size', default=None))
+ if filesize:
+ f['filesize'] = filesize
+ formats.append(f)
+
+ if not formats:
+ if streaming_data.get('licenseInfos'):
+ raise ExtractorError(
+ 'This video is DRM protected.', expected=True)
+ pemr = try_get(
+ playability_status,
+ lambda x: x['errorScreen']['playerErrorMessageRenderer'],
+ dict) or {}
+ reason = get_text(pemr.get('reason')) or playability_status.get('reason')
+ subreason = pemr.get('subreason')
+ if subreason:
+ subreason = clean_html(get_text(subreason))
+ if subreason == 'The uploader has not made this video available in your country.':
+ countries = microformat.get('availableCountries')
+ if not countries:
+ regions_allowed = search_meta('regionsAllowed')
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ subreason, countries)
+ reason += '\n' + subreason
+ if reason:
+ raise ExtractorError(reason, expected=True)
+
+ self._sort_formats(formats)
+
+ keywords = video_details.get('keywords') or []
+ if not keywords and webpage:
+ keywords = [
+ unescapeHTML(m.group('content'))
+ for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
+ for keyword in keywords:
+ if keyword.startswith('yt:stretch='):
+ mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
+ if mobj:
+ # NB: float is intentional for forcing float division
+ w, h = (float(v) for v in mobj.groups())
+ if w > 0 and h > 0:
+ ratio = w / h
+ for f in formats:
+ if f.get('vcodec') != 'none':
+ f['stretched_ratio'] = ratio
+ break
+
+ thumbnails = []
+ for container in (video_details, microformat):
+ for thumbnail in (try_get(
+ container,
+ lambda x: x['thumbnail']['thumbnails'], list) or []):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'height': int_or_none(thumbnail.get('height')),
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ })
+ if thumbnails:
+ break
+ else:
+ thumbnail = search_meta(['og:image', 'twitter:image'])
+ if thumbnail:
+ thumbnails = [{'url': thumbnail}]
+
+ category = microformat.get('category') or search_meta('genre')
+ channel_id = video_details.get('channelId') \
+ or microformat.get('externalChannelId') \
+ or search_meta('channelId')
+ duration = int_or_none(
+ video_details.get('lengthSeconds')
+ or microformat.get('lengthSeconds')) \
+ or parse_duration(search_meta('duration'))
+ is_live = video_details.get('isLive')
+ owner_profile_url = microformat.get('ownerProfileUrl')
+
+ info = {
+ 'id': video_id,
+ 'title': self._live_title(video_title) if is_live else video_title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': video_description,
+ 'upload_date': unified_strdate(
+ microformat.get('uploadDate')
+ or search_meta('uploadDate')),
+ 'uploader': video_details['author'],
+ 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
+ 'uploader_url': owner_profile_url,
+ 'channel_id': channel_id,
+ 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
+ 'duration': duration,
+ 'view_count': int_or_none(
+ video_details.get('viewCount')
+ or microformat.get('viewCount')
+ or search_meta('interactionCount')),
+ 'average_rating': float_or_none(video_details.get('averageRating')),
+ 'age_limit': 18 if (
+ microformat.get('isFamilySafe') is False
+ or search_meta('isFamilyFriendly') == 'false'
+ or search_meta('og:restrictions:age') == '18+') else 0,
+ 'webpage_url': webpage_url,
+ 'categories': [category] if category else None,
+ 'tags': keywords,
+ 'is_live': is_live,
+ }
+
+ pctr = try_get(
+ player_response,
+ lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
+ if pctr:
+ def process_language(container, base_url, lang_code, query):
+ lang_subs = []
+ for fmt in self._SUBTITLE_FORMATS:
+ query.update({
+ 'fmt': fmt,
+ })
+ lang_subs.append({
+ 'ext': fmt,
+ 'url': update_url_query(base_url, query),
+ })
+ container[lang_code] = lang_subs
+
+ subtitles = {}
+ for caption_track in (pctr.get('captionTracks') or []):
+ base_url = caption_track.get('baseUrl')
+ if not base_url:
+ continue
+ if caption_track.get('kind') != 'asr':
+ lang_code = caption_track.get('languageCode')
+ if not lang_code:
+ continue
+ process_language(
+ subtitles, base_url, lang_code, {})
+ continue
+ automatic_captions = {}
+ for translation_language in (pctr.get('translationLanguages') or []):
+ translation_language_code = translation_language.get('languageCode')
+ if not translation_language_code:
+ continue
+ process_language(
+ automatic_captions, base_url, translation_language_code,
+ {'tlang': translation_language_code})
+ info['automatic_captions'] = automatic_captions
+ info['subtitles'] = subtitles
+
+ parsed_url = compat_urllib_parse_urlparse(url)
+ for component in [parsed_url.fragment, parsed_url.query]:
+ query = compat_parse_qs(component)
+ for k, v in query.items():
+ for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
+ d_k += '_time'
+ if d_k not in info and k in s_ks:
+ info[d_k] = parse_duration(query[k][0])
+
+ if video_description:
+ mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
+ if mobj:
+ release_year = mobj.group('release_year')
+ release_date = mobj.group('release_date')
+ if release_date:
+ release_date = release_date.replace('-', '')
+ if not release_year:
+ release_year = release_date[:4]
+ info.update({
+ 'album': mobj.group('album'.strip()),
+ 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
+ 'track': mobj.group('track').strip(),
+ 'release_date': release_date,
+ 'release_year': int_or_none(release_year),
+ })
+
+ initial_data = None
+ if webpage:
+ initial_data = self._extract_yt_initial_variable(
+ webpage, self._YT_INITIAL_DATA_RE, video_id,
+ 'yt initial data')
+ if not initial_data:
+ initial_data = self._call_api(
+ 'next', {'videoId': video_id}, video_id, fatal=False)
+
+ if initial_data:
+ chapters = self._extract_chapters_from_json(
+ initial_data, video_id, duration)
+ if not chapters:
+ for engagment_pannel in (initial_data.get('engagementPanels') or []):
+ contents = try_get(
+ engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
+ list)
+ if not contents:
+ continue
+
+ def chapter_time(mmlir):
+ return parse_duration(
+ get_text(mmlir.get('timeDescription')))
+
+ chapters = []
+ for next_num, content in enumerate(contents, start=1):
+ mmlir = content.get('macroMarkersListItemRenderer') or {}
+ start_time = chapter_time(mmlir)
+ end_time = chapter_time(try_get(
+ contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
+ if next_num < len(contents) else duration
+ if start_time is None or end_time is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': get_text(mmlir.get('title')),
+ })
+ if chapters:
+ break
+ if chapters:
+ info['chapters'] = chapters
+
+ contents = try_get(
+ initial_data,
+ lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
+ list) or []
+ for content in contents:
+ vpir = content.get('videoPrimaryInfoRenderer')
+ if vpir:
+ stl = vpir.get('superTitleLink')
+ if stl:
+ stl = get_text(stl)
+ if try_get(
+ vpir,
+ lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
+ info['location'] = stl
+ else:
+ mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
+ if mobj:
+ info.update({
+ 'series': mobj.group(1),
+ 'season_number': int(mobj.group(2)),
+ 'episode_number': int(mobj.group(3)),
+ })
+ for tlb in (try_get(
+ vpir,
+ lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
+ list) or []):
+ tbr = tlb.get('toggleButtonRenderer') or {}
+ for getter, regex in [(
+ lambda x: x['defaultText']['accessibility']['accessibilityData'],
+ r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
+ lambda x: x['accessibility'],
+ lambda x: x['accessibilityData']['accessibilityData'],
+ ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
+ label = (try_get(tbr, getter, dict) or {}).get('label')
+ if label:
+ mobj = re.match(regex, label)
+ if mobj:
+ info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
+ break
+ sbr_tooltip = try_get(
+ vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
+ if sbr_tooltip:
+ like_count, dislike_count = sbr_tooltip.split(' / ')
+ info.update({
+ 'like_count': str_to_int(like_count),
+ 'dislike_count': str_to_int(dislike_count),
+ })
+ vsir = content.get('videoSecondaryInfoRenderer')
+ if vsir:
+ info['channel'] = get_text(try_get(
+ vsir,
+ lambda x: x['owner']['videoOwnerRenderer']['title'],
+ dict))
+ rows = try_get(
+ vsir,
+ lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
+ list) or []
+ multiple_songs = False
+ for row in rows:
+ if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
+ multiple_songs = True
+ break
+ for row in rows:
+ mrr = row.get('metadataRowRenderer') or {}
+ mrr_title = mrr.get('title')
+ if not mrr_title:
+ continue
+ mrr_title = get_text(mrr['title'])
+ mrr_contents_text = get_text(mrr['contents'][0])
+ if mrr_title == 'License':
+ info['license'] = mrr_contents_text
+ elif not multiple_songs:
+ if mrr_title == 'Album':
+ info['album'] = mrr_contents_text
+ elif mrr_title == 'Artist':
+ info['artist'] = mrr_contents_text
+ elif mrr_title == 'Song':
+ info['track'] = mrr_contents_text
+
+ for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
+ v = info.get(s_k)
+ if v:
+ info[d_k] = v
+
+ self.mark_watched(video_id, player_response)
+
+ return info
+
+
+class YoutubeTabIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com tab'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:\w+\.)?
+ (?:
+ youtube(?:kids)?\.com|
+ invidio\.us
+ )/
+ (?:
+ (?:channel|c|user|feed|hashtag)/|
+ (?:playlist|watch)\?.*?\blist=|
+ (?!(?:watch|embed|v|e)\b)
+ )
+ (?P<id>[^/?\#&]+)
+ '''
+ IE_NAME = 'youtube:tab'
+
+ _TESTS = [{
+ # playlists, multipage
+ 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
+ 'playlist_mincount': 94,
+ 'info_dict': {
+ 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'title': 'Игорь Клейнер - Playlists',
+ 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ },
+ }, {
+ # playlists, multipage, different order
+ 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+ 'playlist_mincount': 94,
+ 'info_dict': {
+ 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'title': 'Игорь Клейнер - Playlists',
+ 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ },
+ }, {
+ # playlists, series
+ 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'title': '3Blue1Brown - Playlists',
+ 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ },
+ }, {
+ # playlists, singlepage
+ 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
+ 'title': 'ThirstForScience - Playlists',
+ 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+ 'only_matching': True,
+ }, {
+ # basic, single video playlist
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'info_dict': {
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'title': 'hypervideo public playlist',
+ },
+ 'playlist_count': 1,
+ }, {
+ # empty playlist
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'info_dict': {
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'title': 'hypervideo empty playlist',
+ },
+ 'playlist_count': 0,
+ }, {
+ # Home tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Home',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ # Videos tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Videos',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 975,
+ }, {
+ # Videos tab, sorted by popular
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Videos',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 199,
+ }, {
+ # Playlists tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Playlists',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 17,
+ }, {
+ # Community tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Community',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 18,
+ }, {
+ # Channels tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Channels',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 138,
+ }, {
+ 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'only_matching': True,
+ }, {
+ 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+ 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'info_dict': {
+ 'title': '29C3: Not my department',
+ 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'uploader': 'Christiaan008',
+ 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
+ },
+ 'playlist_count': 96,
+ }, {
+ 'note': 'Large playlist',
+ 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'info_dict': {
+ 'title': 'Uploads from Cauchemar',
+ 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'uploader': 'Cauchemar',
+ 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+ },
+ 'playlist_mincount': 1123,
+ }, {
+ # even larger playlist, 8832 videos
+ 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
+ 'only_matching': True,
+ }, {
+ 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
+ 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
+ 'info_dict': {
+ 'title': 'Uploads from Interstellar Movie',
+ 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+ 'uploader': 'Interstellar Movie',
+ 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
+ },
+ 'playlist_mincount': 21,
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/21844
+ 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'info_dict': {
+ 'title': 'Data Analysis with Dr Mike Pound',
+ 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
+ 'uploader': 'Computerphile',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'only_matching': True,
+ }, {
+ # Playlist URL that does not actually serve a playlist
+ 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
+ 'info_dict': {
+ 'id': 'FqZTN594JQw',
+ 'ext': 'webm',
+ 'title': "Smiley's People 01 detective, Adventure Series, Action",
+ 'uploader': 'STREEM',
+ 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
+ 'upload_date': '20150526',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
+ 'categories': ['People & Blogs'],
+ 'tags': list,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
+ 'info_dict': {
+ 'id': '9Auq9mYxFEE',
+ 'ext': 'mp4',
+ 'title': 'Watch Sky News live',
+ 'uploader': 'Sky News',
+ 'uploader_id': 'skynews',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
+ 'upload_date': '20191102',
+ 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
+ 'categories': ['News & Politics'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
+ 'info_dict': {
+ 'id': 'a48o2S1cPoo',
+ 'ext': 'mp4',
+ 'title': 'The Young Turks - Live Main Show',
+ 'uploader': 'The Young Turks',
+ 'uploader_id': 'TheYoungTurks',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+ 'upload_date': '20150715',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+ 'categories': ['News & Politics'],
+ 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/feed/trending',
+ 'only_matching': True,
+ }, {
+ # needs auth
+ 'url': 'https://www.youtube.com/feed/library',
+ 'only_matching': True,
+ }, {
+ # needs auth
+ 'url': 'https://www.youtube.com/feed/history',
+ 'only_matching': True,
+ }, {
+ # needs auth
+ 'url': 'https://www.youtube.com/feed/subscriptions',
+ 'only_matching': True,
+ }, {
+ # needs auth
+ 'url': 'https://www.youtube.com/feed/watch_later',
+ 'only_matching': True,
+ }, {
+ # no longer available?
+ 'url': 'https://www.youtube.com/feed/recommended',
+ 'only_matching': True,
+ }, {
+ # inline playlist with not always working continuations
+ 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/course',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/zsecurity',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youtube.com/NASAgovVideo/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/TheYoungTurks/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/hashtag/cctv9',
+ 'info_dict': {
+ 'id': 'cctv9',
+ 'title': '#cctv9',
+ },
+ 'playlist_mincount': 350,
+ }, {
+ 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if YoutubeIE.suitable(url) else super(
+ YoutubeTabIE, cls).suitable(url)
+
+ def _extract_channel_id(self, webpage):
+ channel_id = self._html_search_meta(
+ 'channelId', webpage, 'channel id', default=None)
+ if channel_id:
+ return channel_id
+ channel_url = self._html_search_meta(
+ ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
+ 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
+ 'twitter:app:url:googleplay'), webpage, 'channel url')
+ return self._search_regex(
+ r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
+ channel_url, 'channel id')
+
+ @staticmethod
+ def _extract_grid_item_renderer(item):
+ assert isinstance(item, dict)
+ for key, renderer in item.items():
+ if not key.startswith('grid') or not key.endswith('Renderer'):
+ continue
+ if not isinstance(renderer, dict):
+ continue
+ return renderer
+
+ def _grid_entries(self, grid_renderer):
+ for item in grid_renderer['items']:
+ if not isinstance(item, dict):
+ continue
+ renderer = self._extract_grid_item_renderer(item)
+ if not isinstance(renderer, dict):
+ continue
+ title = try_get(
+ renderer, (lambda x: x['title']['runs'][0]['text'],
+ lambda x: x['title']['simpleText']), compat_str)
+ # playlist
+ playlist_id = renderer.get('playlistId')
+ if playlist_id:
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=title)
+ continue
+ # video
+ video_id = renderer.get('videoId')
+ if video_id:
+ yield self._extract_video(renderer)
+ continue
+ # channel
+ channel_id = renderer.get('channelId')
+ if channel_id:
+ title = try_get(
+ renderer, lambda x: x['title']['simpleText'], compat_str)
+ yield self.url_result(
+ 'https://www.youtube.com/channel/%s' % channel_id,
+ ie=YoutubeTabIE.ie_key(), video_title=title)
+ continue
+ # generic endpoint URL support
+ ep_url = urljoin('https://www.youtube.com/', try_get(
+ renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ compat_str))
+ if ep_url:
+ for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
+ if ie.suitable(ep_url):
+ yield self.url_result(
+ ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
+ break
+
+ def _shelf_entries_from_content(self, shelf_renderer):
+ content = shelf_renderer.get('content')
+ if not isinstance(content, dict):
+ return
+ renderer = content.get('gridRenderer')
+ if renderer:
+ # TODO: add support for nested playlists so each shelf is processed
+ # as separate playlist
+ # TODO: this includes only first N items
+ for entry in self._grid_entries(renderer):
+ yield entry
+ renderer = content.get('horizontalListRenderer')
+ if renderer:
+ # TODO
+ pass
+
+ def _shelf_entries(self, shelf_renderer, skip_channels=False):
+ ep = try_get(
+ shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+ compat_str)
+ shelf_url = urljoin('https://www.youtube.com', ep)
+ if shelf_url:
+ # Skipping links to another channels, note that checking for
+ # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
+ # will not work
+ if skip_channels and '/channels?' in shelf_url:
+ return
+ title = try_get(
+ shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+ yield self.url_result(shelf_url, video_title=title)
+ # Shelf may not contain shelf URL, fallback to extraction from content
+ for entry in self._shelf_entries_from_content(shelf_renderer):
+ yield entry
+
+ def _playlist_entries(self, video_list_renderer):
+ for content in video_list_renderer['contents']:
+ if not isinstance(content, dict):
+ continue
+ renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ video_id = renderer.get('videoId')
+ if not video_id:
+ continue
+ yield self._extract_video(renderer)
+
+ def _video_entry(self, video_renderer):
+ video_id = video_renderer.get('videoId')
+ if video_id:
+ return self._extract_video(video_renderer)
+
+ def _post_thread_entries(self, post_thread_renderer):
+ post_renderer = try_get(
+ post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
+ if not post_renderer:
+ return
+ # video attachment
+ video_renderer = try_get(
+ post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
+ video_id = None
+ if video_renderer:
+ entry = self._video_entry(video_renderer)
+ if entry:
+ yield entry
+ # inline video links
+ runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
+ for run in runs:
+ if not isinstance(run, dict):
+ continue
+ ep_url = try_get(
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+ if not ep_url:
+ continue
+ if not YoutubeIE.suitable(ep_url):
+ continue
+ ep_video_id = YoutubeIE._match_id(ep_url)
+ if video_id == ep_video_id:
+ continue
+ yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
+
+ def _post_thread_continuation_entries(self, post_thread_continuation):
+ contents = post_thread_continuation.get('contents')
+ if not isinstance(contents, list):
+ return
+ for content in contents:
+ renderer = content.get('backstagePostThreadRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ for entry in self._post_thread_entries(renderer):
+ yield entry
+
+ def _rich_grid_entries(self, contents):
+ for content in contents:
+ video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+ if video_renderer:
+ entry = self._video_entry(video_renderer)
+ if entry:
+ yield entry
+
+ @staticmethod
+ def _build_continuation_query(continuation, ctp=None):
+ query = {
+ 'ctoken': continuation,
+ 'continuation': continuation,
+ }
+ if ctp:
+ query['itct'] = ctp
+ return query
+
+ @staticmethod
+ def _extract_next_continuation_data(renderer):
+ next_continuation = try_get(
+ renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
+ if not next_continuation:
+ return
+ continuation = next_continuation.get('continuation')
+ if not continuation:
+ return
+ ctp = next_continuation.get('clickTrackingParams')
+ return YoutubeTabIE._build_continuation_query(continuation, ctp)
+
+ @classmethod
+ def _extract_continuation(cls, renderer):
+ next_continuation = cls._extract_next_continuation_data(renderer)
+ if next_continuation:
+ return next_continuation
+ contents = []
+ for key in ('contents', 'items'):
+ contents.extend(try_get(renderer, lambda x: x[key], list) or [])
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ continuation_ep = try_get(
+ content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
+ dict)
+ if not continuation_ep:
+ continue
+ continuation = try_get(
+ continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ if not continuation:
+ continue
+ ctp = continuation_ep.get('clickTrackingParams')
+ return YoutubeTabIE._build_continuation_query(continuation, ctp)
+
+ def _entries(self, tab, item_id, webpage):
+ tab_content = try_get(tab, lambda x: x['content'], dict)
+ if not tab_content:
+ return
+ slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
+ if slr_renderer:
+ is_channels_tab = tab.get('title') == 'Channels'
+ continuation = None
+ slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
+ for slr_content in slr_contents:
+ if not isinstance(slr_content, dict):
+ continue
+ is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
+ if not is_renderer:
+ continue
+ isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
+ for isr_content in isr_contents:
+ if not isinstance(isr_content, dict):
+ continue
+ renderer = isr_content.get('playlistVideoListRenderer')
+ if renderer:
+ for entry in self._playlist_entries(renderer):
+ yield entry
+ continuation = self._extract_continuation(renderer)
+ continue
+ renderer = isr_content.get('gridRenderer')
+ if renderer:
+ for entry in self._grid_entries(renderer):
+ yield entry
+ continuation = self._extract_continuation(renderer)
+ continue
+ renderer = isr_content.get('shelfRenderer')
+ if renderer:
+ for entry in self._shelf_entries(renderer, not is_channels_tab):
+ yield entry
+ continue
+ renderer = isr_content.get('backstagePostThreadRenderer')
+ if renderer:
+ for entry in self._post_thread_entries(renderer):
+ yield entry
+ continuation = self._extract_continuation(renderer)
+ continue
+ renderer = isr_content.get('videoRenderer')
+ if renderer:
+ entry = self._video_entry(renderer)
+ if entry:
+ yield entry
+
+ if not continuation:
+ continuation = self._extract_continuation(is_renderer)
+ if not continuation:
+ continuation = self._extract_continuation(slr_renderer)
+ else:
+ rich_grid_renderer = tab_content.get('richGridRenderer')
+ if not rich_grid_renderer:
+ return
+ for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
+ yield entry
+ continuation = self._extract_continuation(rich_grid_renderer)
+
+ ytcfg = self._extract_ytcfg(item_id, webpage)
+ client_version = try_get(
+ ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
+
+ headers = {
+ 'x-youtube-client-name': '1',
+ 'x-youtube-client-version': client_version,
+ 'content-type': 'application/json',
+ }
+
+ context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': client_version,
+ }
+ }
+ visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
+
+ identity_token = self._extract_identity_token(ytcfg, webpage)
+ if identity_token:
+ headers['x-youtube-identity-token'] = identity_token
+
+ data = {
+ 'context': context,
+ }
+
+ for page_num in itertools.count(1):
+ if not continuation:
+ break
+ if visitor_data:
+ headers['x-goog-visitor-id'] = visitor_data
+ data['continuation'] = continuation['continuation']
+ data['clickTracking'] = {
+ 'clickTrackingParams': continuation['itct']
+ }
+ count = 0
+ retries = 3
+ while count <= retries:
+ try:
+ # Downloading page may result in intermittent 5xx HTTP error
+ # that is usually worked around with a retry
+ response = self._download_json(
+ 'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
+ headers=headers, data=json.dumps(data).encode('utf8'))
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
+ count += 1
+ if count <= retries:
+ continue
+ raise
+ if not response:
+ break
+
+ visitor_data = try_get(
+ response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
+
+ continuation_contents = try_get(
+ response, lambda x: x['continuationContents'], dict)
+ if continuation_contents:
+ continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
+ if continuation_renderer:
+ for entry in self._playlist_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+ continuation_renderer = continuation_contents.get('gridContinuation')
+ if continuation_renderer:
+ for entry in self._grid_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+ continuation_renderer = continuation_contents.get('itemSectionContinuation')
+ if continuation_renderer:
+ for entry in self._post_thread_continuation_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+
+ on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
+ continuation_items = try_get(
+ on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
+ if continuation_items:
+ continuation_item = continuation_items[0]
+ if not isinstance(continuation_item, dict):
+ continue
+ renderer = self._extract_grid_item_renderer(continuation_item)
+ if renderer:
+ grid_renderer = {'items': continuation_items}
+ for entry in self._grid_entries(grid_renderer):
+ yield entry
+ continuation = self._extract_continuation(grid_renderer)
+ continue
+ renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
+ if renderer:
+ video_list_renderer = {'contents': continuation_items}
+ for entry in self._playlist_entries(video_list_renderer):
+ yield entry
+ continuation = self._extract_continuation(video_list_renderer)
+ continue
+ renderer = continuation_item.get('backstagePostThreadRenderer')
+ if renderer:
+ continuation_renderer = {'contents': continuation_items}
+ for entry in self._post_thread_continuation_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+ renderer = continuation_item.get('richItemRenderer')
+ if renderer:
+ for entry in self._rich_grid_entries(continuation_items):
+ yield entry
+ continuation = self._extract_continuation({'contents': continuation_items})
+ continue
+
+ break
+
+ @staticmethod
+ def _extract_selected_tab(tabs):
+ for tab in tabs:
+ if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
+ return tab['tabRenderer']
+ else:
+ raise ExtractorError('Unable to find selected tab')
+
+ @staticmethod
+ def _extract_uploader(data):
+ uploader = {}
+ sidebar_renderer = try_get(
+ data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
+ if sidebar_renderer:
+ for item in sidebar_renderer:
+ if not isinstance(item, dict):
+ continue
+ renderer = item.get('playlistSidebarSecondaryInfoRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ owner = try_get(
+ renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
+ if owner:
+ uploader['uploader'] = owner.get('text')
+ uploader['uploader_id'] = try_get(
+ owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
+ uploader['uploader_url'] = urljoin(
+ 'https://www.youtube.com/',
+ try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+ return uploader
+
+ @staticmethod
+ def _extract_alert(data):
+ alerts = []
+ for alert in try_get(data, lambda x: x['alerts'], list) or []:
+ if not isinstance(alert, dict):
+ continue
+ alert_text = try_get(
+ alert, lambda x: x['alertRenderer']['text'], dict)
+ if not alert_text:
+ continue
+ text = try_get(
+ alert_text,
+ (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
+ compat_str)
+ if text:
+ alerts.append(text)
+ return '\n'.join(alerts)
+
+ def _extract_from_tabs(self, item_id, webpage, data, tabs):
+ selected_tab = self._extract_selected_tab(tabs)
+ renderer = try_get(
+ data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
+ playlist_id = item_id
+ title = description = None
+ if renderer:
+ channel_title = renderer.get('title') or item_id
+ tab_title = selected_tab.get('title')
+ title = channel_title or item_id
+ if tab_title:
+ title += ' - %s' % tab_title
+ description = renderer.get('description')
+ playlist_id = renderer.get('externalId')
+ else:
+ renderer = try_get(
+ data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
+ if renderer:
+ title = renderer.get('title')
+ else:
+ renderer = try_get(
+ data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
+ if renderer:
+ title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
+ playlist = self.playlist_result(
+ self._entries(selected_tab, item_id, webpage),
+ playlist_id=playlist_id, playlist_title=title,
+ playlist_description=description)
+ playlist.update(self._extract_uploader(data))
+ return playlist
+
+ def _extract_from_playlist(self, item_id, url, data, playlist):
+ title = playlist.get('title') or try_get(
+ data, lambda x: x['titleText']['simpleText'], compat_str)
+ playlist_id = playlist.get('playlistId') or item_id
+ # Inline playlist rendition continuation does not always work
+ # at Youtube side, so delegating regular tab-based playlist URL
+ # processing whenever possible.
+ playlist_url = urljoin(url, try_get(
+ playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+ compat_str))
+ if playlist_url and playlist_url != url:
+ return self.url_result(
+ playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=title)
+ return self.playlist_result(
+ self._playlist_entries(playlist), playlist_id=playlist_id,
+ playlist_title=title)
+
+ def _extract_identity_token(self, ytcfg, webpage):
+ if ytcfg:
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ if token:
+ return token
+ return self._search_regex(
+ r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'identity token', default=None)
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ url = compat_urlparse.urlunparse(
+ compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ # Handle both video/playlist URLs
+ qs = parse_qs(url)
+ video_id = qs.get('v', [None])[0]
+ playlist_id = qs.get('list', [None])[0]
+ if video_id and playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+ webpage = self._download_webpage(url, item_id)
+ data = self._extract_yt_initial_data(item_id, webpage)
+ tabs = try_get(
+ data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
+ if tabs:
+ return self._extract_from_tabs(item_id, webpage, data, tabs)
+ playlist = try_get(
+ data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
+ if playlist:
+ return self._extract_from_playlist(item_id, url, data, playlist)
+ # Fallback to video extraction if no playlist alike page is recognized.
+ # First check for the current video then try the v attribute of URL query.
+ video_id = try_get(
+ data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
+ compat_str) or video_id
+ if video_id:
+ return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
+ # Capture and output alerts
+ alert = self._extract_alert(data)
+ if alert:
+ raise ExtractorError(alert, expected=True)
+ # Failed to recognize
+ raise ExtractorError('Unable to recognize tab page')
+
+
+class YoutubePlaylistIE(InfoExtractor):
+ IE_DESC = 'YouTube.com playlists'
+ _VALID_URL = r'''(?x)(?:
+ (?:https?://)?
+ (?:\w+\.)?
+ (?:
+ (?:
+ youtube(?:kids)?\.com|
+ invidio\.us
+ )
+ /.*?\?.*?\blist=
+ )?
+ (?P<id>%(playlist_id)s)
+ )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ IE_NAME = 'youtube:playlist'
+ _TESTS = [{
+ 'note': 'issue #673',
+ 'url': 'PLBB231211A4F62143',
+ 'info_dict': {
+ 'title': '[OLD]Team Fortress 2 (Class-based LP)',
+ 'id': 'PLBB231211A4F62143',
+ 'uploader': 'Wickydoo',
+ 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ 'info_dict': {
+ 'title': 'YDL_safe_search',
+ 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This playlist is private',
+ }, {
+ 'note': 'embedded',
+ 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'title': 'JODA15',
+ 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'uploader': 'milan',
+ 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
+ }
+ }, {
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 982,
+ 'info_dict': {
+ 'title': '2018 Chinese New Singles (11/6 updated)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'uploader': 'LBK',
+ 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+ }
+ }, {
+ 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
+ 'only_matching': True,
+ }, {
+ # music album playlist
+ 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ if YoutubeTabIE.suitable(url):
+ return False
+ # Hack for lazy extractors until more generic solution is implemented
+ # (see #28780)
+ from .youtube import parse_qs
+ qs = parse_qs(url)
+ if qs.get('v', [None])[0]:
+ return False
+ return super(YoutubePlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ qs = parse_qs(url)
+ if not qs:
+ qs = {'list': playlist_id}
+ return self.url_result(
+ update_url_query('https://www.youtube.com/playlist', qs),
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+
+
+class YoutubeYtBeIE(InfoExtractor):
+ _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _TESTS = [{
+ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+ 'info_dict': {
+ 'id': 'yeWKywCrFtk',
+ 'ext': 'mp4',
+ 'title': 'Small Scale Baler and Braiding Rugs',
+ 'uploader': 'Backus-Page House Museum',
+ 'uploader_id': 'backuspagemuseum',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+ 'upload_date': '20161008',
+ 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+ 'categories': ['Nonprofits & Activism'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ playlist_id = mobj.group('playlist_id')
+ return self.url_result(
+ update_url_query('https://www.youtube.com/watch', {
+ 'v': video_id,
+ 'list': playlist_id,
+ 'feature': 'youtu.be',
+ }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+
+
+class YoutubeYtUserIE(InfoExtractor):
+ _VALID_URL = r'ytuser:(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'ytuser:phihag',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self.url_result(
+ 'https://www.youtube.com/user/%s' % user_id,
+ ie=YoutubeTabIE.ie_key(), video_id=user_id)
+
+
+class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
+ IE_NAME = 'youtube:favorites'
+ IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': ':ytfav',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytfavorites',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'https://www.youtube.com/playlist?list=LL',
+ ie=YoutubeTabIE.ie_key())
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com searches'
+ # there doesn't appear to be a real limit, for example if you search for
+ # 'python' you get more than 8.000.000 results
+ _MAX_RESULTS = float('inf')
+ IE_NAME = 'youtube:search'
+ _SEARCH_KEY = 'ytsearch'
+ _SEARCH_PARAMS = None
+ _TESTS = []
+
+ def _entries(self, query, n):
+ data = {
+ 'context': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20201021.03.00',
+ }
+ },
+ 'query': query,
+ }
+ if self._SEARCH_PARAMS:
+ data['params'] = self._SEARCH_PARAMS
+ total = 0
+ for page_num in itertools.count(1):
+ search = self._download_json(
+ 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ video_id='query "%s"' % query,
+ note='Downloading page %s' % page_num,
+ errnote='Unable to download API page', fatal=False,
+ data=json.dumps(data).encode('utf8'),
+ headers={'content-type': 'application/json'})
+ if not search:
+ break
+ slr_contents = try_get(
+ search,
+ (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
+ lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
+ list)
+ if not slr_contents:
+ break
+ for slr_content in slr_contents:
+ isr_contents = try_get(
+ slr_content,
+ lambda x: x['itemSectionRenderer']['contents'],
+ list)
+ if not isr_contents:
+ continue
+ for content in isr_contents:
+ if not isinstance(content, dict):
+ continue
+ video = content.get('videoRenderer')
+ if not isinstance(video, dict):
+ continue
+ video_id = video.get('videoId')
+ if not video_id:
+ continue
+ yield self._extract_video(video)
+ total += 1
+ if total == n:
+ return
+ token = try_get(
+ slr_contents,
+ lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+ compat_str)
+ if not token:
+ break
+ data['continuation'] = token
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+ return self.playlist_result(self._entries(query, n), query)
+
+
+class YoutubeSearchDateIE(YoutubeSearchIE):
+ IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
+ _SEARCH_KEY = 'ytsearchdate'
+ IE_DESC = 'YouTube.com searches, newest videos first'
+ _SEARCH_PARAMS = 'CAI%3D'
+
+
+r"""
+class YoutubeSearchURLIE(YoutubeSearchIE):
+ IE_DESC = 'YouTube.com search URLs'
+ IE_NAME = 'youtube:search_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'title': 'hypervideo test video',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_urllib_parse_unquote_plus(mobj.group('query'))
+ webpage = self._download_webpage(url, query)
+ return self.playlist_result(self._process_page(webpage), playlist_title=query)
+"""
+
+
+class YoutubeFeedsInfoExtractor(YoutubeTabIE):
+ """
+ Base class for feed extractors
+ Subclasses must define the _FEED_NAME property.
+ """
+ _LOGIN_REQUIRED = True
+
+ @property
+ def IE_NAME(self):
+ return 'youtube:%s' % self._FEED_NAME
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ ie=YoutubeTabIE.ie_key())
+
+
+class YoutubeWatchLaterIE(InfoExtractor):
+ IE_NAME = 'youtube:watchlater'
+ IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+ _VALID_URL = r':ytwatchlater'
+ _TESTS = [{
+ 'url': ':ytwatchlater',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
+
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+ _VALID_URL = r':ytrec(?:ommended)?'
+ _FEED_NAME = 'recommended'
+ _TESTS = [{
+ 'url': ':ytrec',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytrecommended',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+ _VALID_URL = r':ytsubs(?:criptions)?'
+ _FEED_NAME = 'subscriptions'
+ _TESTS = [{
+ 'url': ':ytsubs',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytsubscriptions',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
+ _VALID_URL = r':ythistory'
+ _FEED_NAME = 'history'
+ _TESTS = [{
+ 'url': ':ythistory',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_url'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:https?://)?
+ (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
+ (?:watch\?(?:
+ feature=[a-z_]+|
+ annotation_id=annotation_[^&]+|
+ x-yt-cl=[0-9]+|
+ hl=[^&]*|
+ t=[0-9]+
+ )?
+ |
+ attribution_link\?a=[^&]+
+ )
+ $
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?feature=foo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?hl=en-GB',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?t=2372',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ 'Did you forget to quote the URL? Remember that & is a meta '
+ 'character in most shells, so you want to put the URL in quotes, '
+ 'like hypervideo '
+ '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+ ' or simply hypervideo BaW_jenozKc .',
+ expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_id'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raise ExtractorError(
+ 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+ expected=True)
diff --git a/hypervideo_dl/extractor/zapiks.py b/hypervideo_dl/extractor/zapiks.py
new file mode 100644
index 0000000..f6496f5
--- /dev/null
+++ b/hypervideo_dl/extractor/zapiks.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ int_or_none,
+)
+
+
+class ZapiksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+ _TESTS = [
+ {
+ 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+ 'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
+ 'info_dict': {
+ 'id': '80798',
+ 'ext': 'mp4',
+ 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
+ 'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 528,
+ 'timestamp': 1359044972,
+ 'upload_date': '20130124',
+ 'view_count': int,
+ },
+ },
+ {
+ 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-media-id="(\d+)"', webpage, 'video id')
+
+ playlist = self._download_xml(
+ 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
+ display_id)
+
+ NS_MAP = {
+ 'jwplayer': 'http://rss.jwpcdn.com/'
+ }
+
+ def ns(path):
+ return xpath_with_ns(path, NS_MAP)
+
+ item = playlist.find('./channel/item')
+
+ title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = xpath_text(
+ item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', default=None))
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None), ' ')
+
+ view_count = int_or_none(self._search_regex(
+ r'UserPlays:(\d+)', webpage, 'view count', default=None))
+ comment_count = int_or_none(self._search_regex(
+ r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+ formats = []
+ for source in item.findall(ns('./jwplayer:source')):
+ format_id = source.attrib['label']
+ f = {
+ 'url': source.attrib['file'],
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+ if m:
+ f['height'] = int(m.group('height'))
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/zattoo.py b/hypervideo_dl/extractor/zattoo.py
new file mode 100644
index 0000000..6bac302
--- /dev/null
+++ b/hypervideo_dl/extractor/zattoo.py
@@ -0,0 +1,433 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from uuid import uuid4
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class ZattooPlatformBaseIE(InfoExtractor):
+ _power_guide_hash = None
+
+ def _host_url(self):
+ return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if not username or not password:
+ self.raise_login_required(
+ 'A valid %s account is needed to access this media.'
+ % self._NETRC_MACHINE)
+
+ try:
+ data = self._download_json(
+ '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
+ data=urlencode_postdata({
+ 'login': username,
+ 'password': password,
+ 'remember': 'true',
+ }), headers={
+ 'Referer': '%s/login' % self._host_url(),
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError(
+ 'Unable to login: incorrect username and/or password',
+ expected=True)
+ raise
+
+ self._power_guide_hash = data['session']['power_guide_hash']
+
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ self._host_url(), None, 'Downloading app token')
+ app_token = self._html_search_regex(
+ r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
+ webpage, 'app token', group='token')
+ app_version = self._html_search_regex(
+ r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
+
+ # Will setup appropriate cookies
+ self._request_webpage(
+ '%s/zapi/v2/session/hello' % self._host_url(), None,
+ 'Opening session', data=urlencode_postdata({
+ 'client_app_token': app_token,
+ 'uuid': compat_str(uuid4()),
+ 'lang': 'en',
+ 'app_version': app_version,
+ 'format': 'json',
+ }))
+
+ self._login()
+
+ def _extract_cid(self, video_id, channel_name):
+ channel_groups = self._download_json(
+ '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
+ self._power_guide_hash),
+ video_id, 'Downloading channel list',
+ query={'details': False})['channel_groups']
+ channel_list = []
+ for chgrp in channel_groups:
+ channel_list.extend(chgrp['channels'])
+ try:
+ return next(
+ chan['cid'] for chan in channel_list
+ if chan.get('cid') and (
+ chan.get('display_alias') == channel_name
+ or chan.get('cid') == channel_name))
+ except StopIteration:
+ raise ExtractorError('Could not extract channel id')
+
+ def _extract_cid_and_video_info(self, video_id):
+ data = self._download_json(
+ '%s/zapi/v2/cached/program/power_details/%s' % (
+ self._host_url(), self._power_guide_hash),
+ video_id,
+ 'Downloading video information',
+ query={
+ 'program_ids': video_id,
+ 'complete': True,
+ })
+
+ p = data['programs'][0]
+ cid = p['cid']
+
+ info_dict = {
+ 'id': video_id,
+ 'title': p.get('t') or p['et'],
+ 'description': p.get('d'),
+ 'thumbnail': p.get('i_url'),
+ 'creator': p.get('channel_name'),
+ 'episode': p.get('et'),
+ 'episode_number': int_or_none(p.get('e_no')),
+ 'season_number': int_or_none(p.get('s_no')),
+ 'release_year': int_or_none(p.get('year')),
+ 'categories': try_get(p, lambda x: x['c'], list),
+ 'tags': try_get(p, lambda x: x['g'], list)
+ }
+
+ return cid, info_dict
+
+ def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
+ postdata_common = {
+ 'https_watch_urls': True,
+ }
+
+ if is_live:
+ postdata_common.update({'timeshift': 10800})
+ url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
+ elif record_id:
+ url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
+ else:
+ url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
+
+ formats = []
+ for stream_type in ('dash', 'hls', 'hls5', 'hds'):
+ postdata = postdata_common.copy()
+ postdata['stream_type'] = stream_type
+
+ data = self._download_json(
+ url, video_id, 'Downloading %s formats' % stream_type.upper(),
+ data=urlencode_postdata(postdata), fatal=False)
+ if not data:
+ continue
+
+ watch_urls = try_get(
+ data, lambda x: x['stream']['watch_urls'], list)
+ if not watch_urls:
+ continue
+
+ for watch in watch_urls:
+ if not isinstance(watch, dict):
+ continue
+ watch_url = url_or_none(watch.get('url'))
+ if not watch_url:
+ continue
+ format_id_list = [stream_type]
+ maxrate = watch.get('maxrate')
+ if maxrate:
+ format_id_list.append(compat_str(maxrate))
+ audio_channel = watch.get('audio_channel')
+ if audio_channel:
+ format_id_list.append(compat_str(audio_channel))
+ preference = 1 if audio_channel == 'A' else None
+ format_id = '-'.join(format_id_list)
+ if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
+ this_formats = self._extract_mpd_formats(
+ watch_url, video_id, mpd_id=format_id, fatal=False)
+ elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
+ this_formats = self._extract_m3u8_formats(
+ watch_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False)
+ elif stream_type == 'hds':
+ this_formats = self._extract_f4m_formats(
+ watch_url, video_id, f4m_id=format_id, fatal=False)
+ elif stream_type == 'smooth_playready':
+ this_formats = self._extract_ism_formats(
+ watch_url, video_id, ism_id=format_id, fatal=False)
+ else:
+ assert False
+ for this_format in this_formats:
+ this_format['preference'] = preference
+ formats.extend(this_formats)
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
+ if is_live:
+ cid = self._extract_cid(video_id, channel_name)
+ info_dict = {
+ 'id': channel_name,
+ 'title': self._live_title(channel_name),
+ 'is_live': True,
+ }
+ else:
+ cid, info_dict = self._extract_cid_and_video_info(video_id)
+ formats = self._extract_formats(
+ cid, video_id, record_id=record_id, is_live=is_live)
+ info_dict['formats'] = formats
+ return info_dict
+
+
+class QuicklineBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'quickline'
+ _HOST = 'mobiltv.quickline.com'
+
+
+class QuicklineIE(QuicklineBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
+
+ _TEST = {
+ 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ channel_name, video_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_video(channel_name, video_id)
+
+
+class QuicklineLiveIE(QuicklineBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
+
+ _TEST = {
+ 'url': 'https://mobiltv.quickline.com/watch/srf1',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_name = video_id = self._match_id(url)
+ return self._extract_video(channel_name, video_id, is_live=True)
+
+
+class ZattooBaseIE(ZattooPlatformBaseIE):
+ _NETRC_MACHINE = 'zattoo'
+ _HOST = 'zattoo.com'
+
+
+def _make_valid_url(tmpl, host):
+ return tmpl % re.escape(host)
+
+
+class ZattooIE(ZattooBaseIE):
+ _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
+ _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
+
+ # Since regular videos are only available for 7 days and recorded videos
+ # are only available for a specific user, we cannot have detailed tests.
+ _TESTS = [{
+ 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_video(channel_name, video_id, record_id)
+
+
+class ZattooLiveIE(ZattooBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'https://zattoo.com/watch/srf1',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_name = video_id = self._match_id(url)
+ return self._extract_video(channel_name, video_id, is_live=True)
+
+
+class NetPlusIE(ZattooIE):
+ _NETRC_MACHINE = 'netplus'
+ _HOST = 'netplus.tv'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.netplus.tv/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class MNetTVIE(ZattooIE):
+ _NETRC_MACHINE = 'mnettv'
+ _HOST = 'tvplus.m-net.de'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class WalyTVIE(ZattooIE):
+ _NETRC_MACHINE = 'walytv'
+ _HOST = 'player.waly.tv'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://player.waly.tv/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class BBVTVIE(ZattooIE):
+ _NETRC_MACHINE = 'bbvtv'
+ _HOST = 'bbv-tv.net'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class VTXTVIE(ZattooIE):
+ _NETRC_MACHINE = 'vtxtv'
+ _HOST = 'vtxtv.ch'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class MyVisionTVIE(ZattooIE):
+ _NETRC_MACHINE = 'myvisiontv'
+ _HOST = 'myvisiontv.ch'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class GlattvisionTVIE(ZattooIE):
+ _NETRC_MACHINE = 'glattvisiontv'
+ _HOST = 'iptv.glattvision.ch'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class SAKTVIE(ZattooIE):
+ _NETRC_MACHINE = 'saktv'
+ _HOST = 'saktv.ch'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.saktv.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class EWETVIE(ZattooIE):
+ _NETRC_MACHINE = 'ewetv'
+ _HOST = 'tvonline.ewe.de'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class QuantumTVIE(ZattooIE):
+ _NETRC_MACHINE = 'quantumtv'
+ _HOST = 'quantum-tv.com'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class OsnatelTVIE(ZattooIE):
+ _NETRC_MACHINE = 'osnateltv'
+ _HOST = 'tvonline.osnatel.de'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class EinsUndEinsTVIE(ZattooIE):
+ _NETRC_MACHINE = '1und1tv'
+ _HOST = '1und1.tv'
+ _API_HOST = 'www.%s' % _HOST
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://www.1und1.tv/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
+
+
+class SaltTVIE(ZattooIE):
+ _NETRC_MACHINE = 'salttv'
+ _HOST = 'tv.salt.ch'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
diff --git a/hypervideo_dl/extractor/zdf.py b/hypervideo_dl/extractor/zdf.py
new file mode 100644
index 0000000..4dd56f6
--- /dev/null
+++ b/hypervideo_dl/extractor/zdf.py
@@ -0,0 +1,378 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ NO_DEFAULT,
+ orderedSet,
+ parse_codecs,
+ qualities,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class ZDFBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+ _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
+
+ def _call_api(self, url, video_id, item, api_token=None, referrer=None):
+ headers = {}
+ if api_token:
+ headers['Api-Auth'] = 'Bearer %s' % api_token
+ if referrer:
+ headers['Referer'] = referrer
+ return self._download_json(
+ url, video_id, 'Downloading JSON %s' % item, headers=headers)
+
+ @staticmethod
+ def _extract_subtitles(src):
+ subtitles = {}
+ for caption in try_get(src, lambda x: x['captions'], list) or []:
+ subtitle_url = url_or_none(caption.get('uri'))
+ if subtitle_url:
+ lang = caption.get('language', 'deu')
+ subtitles.setdefault(lang, []).append({
+ 'url': subtitle_url,
+ })
+ return subtitles
+
+ def _extract_format(self, video_id, formats, format_urls, meta):
+ format_url = url_or_none(meta.get('url'))
+ if not format_url:
+ return
+ if format_url in format_urls:
+ return
+ format_urls.add(format_url)
+ mime_type = meta.get('mimeType')
+ ext = determine_ext(format_url)
+ if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False))
+ elif mime_type == 'application/f4m+xml' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+ else:
+ f = parse_codecs(meta.get('mimeCodec'))
+ format_id = ['http']
+ for p in (meta.get('type'), meta.get('quality')):
+ if p and isinstance(p, compat_str):
+ format_id.append(p)
+ f.update({
+ 'url': format_url,
+ 'format_id': '-'.join(format_id),
+ 'format_note': meta.get('quality'),
+ 'language': meta.get('language'),
+ 'quality': qualities(self._QUALITIES)(meta.get('quality')),
+ 'preference': -10,
+ })
+ formats.append(f)
+
+ def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
+ ptmd = self._call_api(
+ ptmd_url, video_id, 'metadata', api_token, referrer)
+
+ content_id = ptmd.get('basename') or ptmd_url.split('/')[-1]
+
+ formats = []
+ track_uris = set()
+ for p in ptmd['priorityList']:
+ formitaeten = p.get('formitaeten')
+ if not isinstance(formitaeten, list):
+ continue
+ for f in formitaeten:
+ f_qualities = f.get('qualities')
+ if not isinstance(f_qualities, list):
+ continue
+ for quality in f_qualities:
+ tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
+ if not tracks:
+ continue
+ for track in tracks:
+ self._extract_format(
+ content_id, formats, track_uris, {
+ 'url': track.get('uri'),
+ 'type': f.get('type'),
+ 'mimeType': f.get('mimeType'),
+ 'quality': quality.get('quality'),
+ 'language': track.get('language'),
+ })
+ self._sort_formats(formats)
+
+ duration = float_or_none(try_get(
+ ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
+
+ return {
+ 'extractor_key': ZDFIE.ie_key(),
+ 'id': content_id,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(ptmd),
+ }
+
+ def _extract_player(self, webpage, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
+ 'player JSON', default='{}' if not fatal else NO_DEFAULT,
+ group='json'),
+ video_id)
+
+
+class ZDFIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+ _TESTS = [{
+ # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
+ 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
+ 'md5': '34ec321e7eb34231fd88616c65c92db0',
+ 'info_dict': {
+ 'id': '210222_phx_nachgehakt_corona_protest',
+ 'ext': 'mp4',
+ 'title': 'Wohin führt der Protest in der Pandemie?',
+ 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+ 'duration': 1691,
+ 'timestamp': 1613948400,
+ 'upload_date': '20210221',
+ },
+ }, {
+ # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
+ 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
+ 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+ 'info_dict': {
+ 'id': '141007_ab18_10wochensommer_film',
+ 'ext': 'mp4',
+ 'title': 'Ab 18! - 10 Wochen Sommer',
+ 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+ 'duration': 2660,
+ 'timestamp': 1608604200,
+ 'upload_date': '20201222',
+ },
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
+ 'info_dict': {
+ 'id': '151025_magie_farben2_tex',
+ 'ext': 'mp4',
+ 'title': 'Die Magie der Farben (2/2)',
+ 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+ 'duration': 2615,
+ 'timestamp': 1465021200,
+ 'upload_date': '20160604',
+ },
+ }, {
+ # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
+ 'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.3sat.de/film/spielfilm/der-hauptmann-100.html
+ 'url': 'https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html',
+ 'only_matching': True,
+ }, {
+ # Same as https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+ 'url': 'https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_entry(self, url, player, content, video_id):
+ title = content.get('title') or content['teaserHeadline']
+
+ t = content['mainVideoContent']['http://zdf.de/rels/target']
+
+ ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
+
+ if not ptmd_path:
+ ptmd_path = t[
+ 'http://zdf.de/rels/streams/ptmd-template'].replace(
+ '{playerId}', 'ngplayer_2_4')
+
+ info = self._extract_ptmd(
+ urljoin(url, ptmd_path), video_id, player['apiToken'], url)
+
+ thumbnails = []
+ layouts = try_get(
+ content, lambda x: x['teaserImageRef']['layouts'], dict)
+ if layouts:
+ for layout_key, layout_url in layouts.items():
+ layout_url = url_or_none(layout_url)
+ if not layout_url:
+ continue
+ thumbnail = {
+ 'url': layout_url,
+ 'format_id': layout_key,
+ }
+ mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
+ if mobj:
+ thumbnail.update({
+ 'width': int(mobj.group('width')),
+ 'height': int(mobj.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ return merge_dicts(info, {
+ 'title': title,
+ 'description': content.get('leadParagraph') or content.get('teasertext'),
+ 'duration': int_or_none(t.get('duration')),
+ 'timestamp': unified_timestamp(content.get('editorialDate')),
+ 'thumbnails': thumbnails,
+ })
+
+ def _extract_regular(self, url, player, video_id):
+ content = self._call_api(
+ player['content'], video_id, 'content', player['apiToken'], url)
+ return self._extract_entry(player['content'], player, content, video_id)
+
+ def _extract_mobile(self, video_id):
+ video = self._download_json(
+ 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
+ video_id)
+
+ document = video['document']
+
+ title = document['titel']
+ content_id = document['basename']
+
+ formats = []
+ format_urls = set()
+ for f in document['formitaeten']:
+ self._extract_format(content_id, formats, format_urls, f)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ teaser_bild = document.get('teaserBild')
+ if isinstance(teaser_bild, dict):
+ for thumbnail_key, thumbnail in teaser_bild.items():
+ thumbnail_url = try_get(
+ thumbnail, lambda x: x['url'], compat_str)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'id': thumbnail_key,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'description': document.get('beschreibung'),
+ 'duration': int_or_none(document.get('length')),
+ 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
+ try_get(video, lambda x: x['meta']['editorialDate'], compat_str)),
+ 'thumbnails': thumbnails,
+ 'subtitles': self._extract_subtitles(document),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+ if webpage:
+ player = self._extract_player(webpage, url, fatal=False)
+ if player:
+ return self._extract_regular(url, player, video_id)
+
+ return self._extract_mobile(video_id)
+
+
+class ZDFChannelIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
+ 'info_dict': {
+ 'id': 'das-aktuelle-sportstudio',
+ 'title': 'das aktuelle sportstudio | ZDF',
+ },
+ 'playlist_mincount': 23,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e',
+ 'info_dict': {
+ 'id': 'planet-e',
+ 'title': 'planet e.',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, channel_id)
+
+ entries = [
+ self.url_result(item_url, ie=ZDFIE.ie_key())
+ for item_url in orderedSet(re.findall(
+ r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
+
+ return self.playlist_result(
+ entries, channel_id, self._og_search_title(webpage, fatal=False))
+
+ r"""
+ player = self._extract_player(webpage, channel_id)
+
+ channel_id = self._search_regex(
+ r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
+ 'channel id', group='id')
+
+ channel = self._call_api(
+ 'https://api.zdf.de/content/documents/%s.json' % channel_id,
+ player, url, channel_id)
+
+ items = []
+ for module in channel['module']:
+ for teaser in try_get(module, lambda x: x['teaser'], list) or []:
+ t = try_get(
+ teaser, lambda x: x['http://zdf.de/rels/target'], dict)
+ if not t:
+ continue
+ items.extend(try_get(
+ t,
+ lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
+ list) or [])
+ items.extend(try_get(
+ module,
+ lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
+ list) or [])
+
+ entries = []
+ entry_urls = set()
+ for item in items:
+ t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
+ if not t:
+ continue
+ sharing_url = t.get('http://zdf.de/rels/sharing-url')
+ if not sharing_url or not isinstance(sharing_url, compat_str):
+ continue
+ if sharing_url in entry_urls:
+ continue
+ entry_urls.add(sharing_url)
+ entries.append(self.url_result(
+ sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
+
+ return self.playlist_result(entries, channel_id, channel.get('title'))
+ """
diff --git a/hypervideo_dl/extractor/zhihu.py b/hypervideo_dl/extractor/zhihu.py
new file mode 100644
index 0000000..d1ed55b
--- /dev/null
+++ b/hypervideo_dl/extractor/zhihu.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import float_or_none, int_or_none
+
+
+class ZhihuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
+ 'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
+ 'info_dict': {
+ 'id': '1342930761977176064',
+ 'ext': 'mp4',
+ 'title': '写春联也太难了吧!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': '桥半舫',
+ 'timestamp': 1612959715,
+ 'upload_date': '20210210',
+ 'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
+ 'duration': 146.333,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ zvideo = self._download_json(
+ 'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
+ title = zvideo['title']
+ video = zvideo.get('video') or {}
+
+ formats = []
+ for format_id, q in (video.get('playlist') or {}).items():
+ play_url = q.get('url') or q.get('play_url')
+ if not play_url:
+ continue
+ formats.append({
+ 'asr': int_or_none(q.get('sample_rate')),
+ 'filesize': int_or_none(q.get('size')),
+ 'format_id': format_id,
+ 'fps': int_or_none(q.get('fps')),
+ 'height': int_or_none(q.get('height')),
+ 'tbr': float_or_none(q.get('bitrate')),
+ 'url': play_url,
+ 'width': int_or_none(q.get('width')),
+ })
+ self._sort_formats(formats)
+
+ author = zvideo.get('author') or {}
+ url_token = author.get('url_token')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
+ 'uploader': author.get('name'),
+ 'timestamp': int_or_none(zvideo.get('published_at')),
+ 'uploader_id': author.get('id'),
+ 'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None,
+ 'duration': float_or_none(video.get('duration')),
+ 'view_count': int_or_none(zvideo.get('play_count')),
+ 'like_count': int_or_none(zvideo.get('liked_count')),
+ 'comment_count': int_or_none(zvideo.get('comment_count')),
+ }
diff --git a/hypervideo_dl/extractor/zingmp3.py b/hypervideo_dl/extractor/zingmp3.py
new file mode 100644
index 0000000..207c04f
--- /dev/null
+++ b/hypervideo_dl/extractor/zingmp3.py
@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class ZingMp3BaseIE(InfoExtractor):
+ _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?:%s)/[^/]+/(?P<id>\w+)\.html'
+ _GEO_COUNTRIES = ['VN']
+
+ def _extract_item(self, item, fatal):
+ item_id = item['id']
+ title = item.get('name') or item['title']
+
+ formats = []
+ for k, v in (item.get('source') or {}).items():
+ if not v:
+ continue
+ if k in ('mp4', 'hls'):
+ for res, video_url in v.items():
+ if not video_url:
+ continue
+ if k == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, item_id, 'mp4',
+ 'm3u8_native', m3u8_id=k, fatal=False))
+ elif k == 'mp4':
+ formats.append({
+ 'format_id': 'mp4-' + res,
+ 'url': video_url,
+ 'height': int_or_none(self._search_regex(
+ r'^(\d+)p', res, 'resolution', default=None)),
+ })
+ else:
+ formats.append({
+ 'ext': 'mp3',
+ 'format_id': k,
+ 'tbr': int_or_none(k),
+ 'url': self._proto_relative_url(v),
+ 'vcodec': 'none',
+ })
+ if not formats:
+ if not fatal:
+ return
+ msg = item['msg']
+ if msg == 'Sorry, this content is not available in your country.':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(msg, expected=True)
+ self._sort_formats(formats)
+
+ subtitles = None
+ lyric = item.get('lyric')
+ if lyric:
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }],
+ }
+
+ album = item.get('album') or {}
+
+ return {
+ 'id': item_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': item.get('thumbnail'),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(item.get('duration')),
+ 'track': title,
+ 'artist': item.get('artists_names'),
+ 'album': album.get('name') or album.get('title'),
+ 'album_artist': album.get('artists_names'),
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url.replace('://zingmp3.vn/', '://mp3.zing.vn/'),
+ page_id, query={'play_song': 1})
+ data_path = self._search_regex(
+ r'data-xml="([^"]+)', webpage, 'data path')
+ return self._process_data(self._download_json(
+ 'https://mp3.zing.vn/xhr' + data_path, page_id)['data'])
+
+
+class ZingMp3IE(ZingMp3BaseIE):
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip'
+ _TESTS = [{
+ 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+ 'md5': 'ead7ae13693b3205cbc89536a077daed',
+ 'info_dict': {
+ 'id': 'ZWZB9WAB',
+ 'title': 'Xa Mãi Xa',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }]
+ },
+ 'duration': 255,
+ 'track': 'Xa Mãi Xa',
+ 'artist': 'Bảo Thy',
+ 'album': 'Special Album',
+ 'album_artist': 'Bảo Thy',
+ },
+ }, {
+ 'url': 'https://mp3.zing.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
+ 'md5': 'e9c972b693aa88301ef981c8151c4343',
+ 'info_dict': {
+ 'id': 'ZO8ZF7C7',
+ 'title': 'Sương Hoa Đưa Lối',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 207,
+ 'track': 'Sương Hoa Đưa Lối',
+ 'artist': 'K-ICM, RYO',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+ 'only_matching': True,
+ }]
+ IE_NAME = 'zingmp3'
+ IE_DESC = 'mp3.zing.vn'
+
+ def _process_data(self, data):
+ return self._extract_item(data, True)
+
+
+class ZingMp3AlbumIE(ZingMp3BaseIE):
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'album|playlist'
+ _TESTS = [{
+ 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ZWZBWDAF',
+ 'title': 'Lâu Đài Tình Ái',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zingmp3.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
+ 'only_matching': True,
+ }]
+ IE_NAME = 'zingmp3:album'
+
+ def _process_data(self, data):
+ def entries():
+ for item in (data.get('items') or []):
+ entry = self._extract_item(item, False)
+ if entry:
+ yield entry
+ info = data.get('info') or {}
+ return self.playlist_result(
+ entries(), info.get('id'), info.get('name') or info.get('title'))
diff --git a/hypervideo_dl/extractor/zoom.py b/hypervideo_dl/extractor/zoom.py
new file mode 100644
index 0000000..db073d9
--- /dev/null
+++ b/hypervideo_dl/extractor/zoom.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ parse_filesize,
+ urlencode_postdata,
+)
+
+
+class ZoomIE(InfoExtractor):
+ IE_NAME = 'zoom'
+ _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)'
+ _TEST = {
+ 'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
+ 'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
+ 'info_dict': {
+ 'id': 'dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
+ 'ext': 'mp4',
+ 'title': 'China\'s "two sessions" and the new five-year plan',
+ }
+ }
+
+ def _real_extract(self, url):
+ base_url, play_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, play_id)
+
+ try:
+ form = self._form_hidden_inputs('password_form', webpage)
+ except ExtractorError:
+ form = None
+ if form:
+ password = self._downloader.params.get('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This video is protected by a passcode, use the --video-password option', expected=True)
+ is_meeting = form.get('useWhichPasswd') == 'meeting'
+ validation = self._download_json(
+ base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
+ play_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
+ 'id': form[('meet' if is_meeting else 'file') + 'Id'],
+ 'passwd': password,
+ 'action': form.get('action'),
+ }))
+ if not validation.get('status'):
+ raise ExtractorError(validation['errorMessage'], expected=True)
+ webpage = self._download_webpage(url, play_id)
+
+ data = self._parse_json(self._search_regex(
+ r'(?s)window\.__data__\s*=\s*({.+?});',
+ webpage, 'data'), play_id, js_to_json)
+
+ return {
+ 'id': play_id,
+ 'title': data['topic'],
+ 'url': data['viewMp4Url'],
+ 'width': int_or_none(data.get('viewResolvtionsWidth')),
+ 'height': int_or_none(data.get('viewResolvtionsHeight')),
+ 'http_headers': {
+ 'Referer': base_url,
+ },
+ 'filesize_approx': parse_filesize(data.get('fileSize')),
+ }
diff --git a/hypervideo_dl/extractor/zype.py b/hypervideo_dl/extractor/zype.py
new file mode 100644
index 0000000..f20f953
--- /dev/null
+++ b/hypervideo_dl/extractor/zype.py
@@ -0,0 +1,145 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class ZypeIE(InfoExtractor):
+ _ID_RE = r'[\da-fA-F]+'
+ _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
+ _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
+ _TEST = {
+ 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
+ 'md5': 'eaee31d474c76a955bdaba02a505c595',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'timestamp': 1504915200,
+ 'upload_date': '20170909',
+ },
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(re.sub(
+ r'\.(?:js|html)\?', '.json?', url), video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), video_id)['message'], expected=True)
+ raise
+
+ body = response['body']
+ video = response['video']
+ title = video['title']
+
+ if isinstance(body, dict):
+ formats = []
+ for output in body.get('outputs', []):
+ output_url = output.get('url')
+ if not output_url:
+ continue
+ name = output.get('name')
+ if name == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ output_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ else:
+ f = {
+ 'format_id': name,
+ 'tbr': int_or_none(output.get('bitrate')),
+ 'url': output_url,
+ }
+ if name in ('m4a', 'mp3'):
+ f['vcodec'] = 'none'
+ else:
+ f.update({
+ 'height': int_or_none(output.get('height')),
+ 'width': int_or_none(output.get('width')),
+ })
+ formats.append(f)
+ text_tracks = body.get('subtitles') or []
+ else:
+ m3u8_url = self._search_regex(
+ r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
+ body, 'm3u8 url', group='url', default=None)
+ if not m3u8_url:
+ source = self._search_regex(
+ r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
+
+ def get_attr(key):
+ return self._search_regex(
+ r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
+ source, key, group='val')
+
+ if get_attr('integration') == 'verizon-media':
+ m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ text_tracks = self._search_regex(
+ r'textTracks\s*:\s*(\[[^]]+\])',
+ body, 'text tracks', default=None)
+ if text_tracks:
+ text_tracks = self._parse_json(
+ text_tracks, video_id, js_to_json, False)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if text_tracks:
+ for text_track in text_tracks:
+ tt_url = dict_get(text_track, ('file', 'src'))
+ if not tt_url:
+ continue
+ subtitles.setdefault(text_track.get('label') or 'English', []).append({
+ 'url': tt_url,
+ })
+
+ thumbnails = []
+ for thumbnail in video.get('thumbnails', []):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': video.get('friendly_title'),
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
+ 'timestamp': parse_iso8601(video.get('published_at')),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('request_count')),
+ 'average_rating': int_or_none(video.get('rating')),
+ 'season_number': int_or_none(video.get('season')),
+ 'episode_number': int_or_none(video.get('episode')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/jsinterp.py b/hypervideo_dl/jsinterp.py
new file mode 100644
index 0000000..7bda596
--- /dev/null
+++ b/hypervideo_dl/jsinterp.py
@@ -0,0 +1,262 @@
+from __future__ import unicode_literals
+
+import json
+import operator
+import re
+
+from .utils import (
+ ExtractorError,
+ remove_quotes,
+)
+
+_OPERATORS = [
+ ('|', operator.or_),
+ ('^', operator.xor),
+ ('&', operator.and_),
+ ('>>', operator.rshift),
+ ('<<', operator.lshift),
+ ('-', operator.sub),
+ ('+', operator.add),
+ ('%', operator.mod),
+ ('/', operator.truediv),
+ ('*', operator.mul),
+]
+_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
+_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
+
+_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+
+
+class JSInterpreter(object):
+ def __init__(self, code, objects=None):
+ if objects is None:
+ objects = {}
+ self.code = code
+ self._functions = {}
+ self._objects = objects
+
+ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
+ if allow_recursion < 0:
+ raise ExtractorError('Recursion limit reached')
+
+ should_abort = False
+ stmt = stmt.lstrip()
+ stmt_m = re.match(r'var\s', stmt)
+ if stmt_m:
+ expr = stmt[len(stmt_m.group(0)):]
+ else:
+ return_m = re.match(r'return(?:\s+|$)', stmt)
+ if return_m:
+ expr = stmt[len(return_m.group(0)):]
+ should_abort = True
+ else:
+ # Try interpreting it as an expression
+ expr = stmt
+
+ v = self.interpret_expression(expr, local_vars, allow_recursion)
+ return v, should_abort
+
+ def interpret_expression(self, expr, local_vars, allow_recursion):
+ expr = expr.strip()
+ if expr == '': # Empty expression
+ return None
+
+ if expr.startswith('('):
+ parens_count = 0
+ for m in re.finditer(r'[()]', expr):
+ if m.group(0) == '(':
+ parens_count += 1
+ else:
+ parens_count -= 1
+ if parens_count == 0:
+ sub_expr = expr[1:m.start()]
+ sub_result = self.interpret_expression(
+ sub_expr, local_vars, allow_recursion)
+ remaining_expr = expr[m.end():].strip()
+ if not remaining_expr:
+ return sub_result
+ else:
+ expr = json.dumps(sub_result) + remaining_expr
+ break
+ else:
+ raise ExtractorError('Premature end of parens in %r' % expr)
+
+ for op, opfunc in _ASSIGN_OPERATORS:
+ m = re.match(r'''(?x)
+ (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
+ \s*%s
+ (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
+ if not m:
+ continue
+ right_val = self.interpret_expression(
+ m.group('expr'), local_vars, allow_recursion - 1)
+
+ if m.groupdict().get('index'):
+ lvar = local_vars[m.group('out')]
+ idx = self.interpret_expression(
+ m.group('index'), local_vars, allow_recursion)
+ assert isinstance(idx, int)
+ cur = lvar[idx]
+ val = opfunc(cur, right_val)
+ lvar[idx] = val
+ return val
+ else:
+ cur = local_vars.get(m.group('out'))
+ val = opfunc(cur, right_val)
+ local_vars[m.group('out')] = val
+ return val
+
+ if expr.isdigit():
+ return int(expr)
+
+ var_m = re.match(
+ r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
+ expr)
+ if var_m:
+ return local_vars[var_m.group('name')]
+
+ try:
+ return json.loads(expr)
+ except ValueError:
+ pass
+
+ m = re.match(
+ r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
+ if m:
+ val = local_vars[m.group('in')]
+ idx = self.interpret_expression(
+ m.group('idx'), local_vars, allow_recursion - 1)
+ return val[idx]
+
+ m = re.match(
+ r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
+ expr)
+ if m:
+ variable = m.group('var')
+ member = remove_quotes(m.group('member') or m.group('member2'))
+ arg_str = m.group('args')
+
+ if variable in local_vars:
+ obj = local_vars[variable]
+ else:
+ if variable not in self._objects:
+ self._objects[variable] = self.extract_object(variable)
+ obj = self._objects[variable]
+
+ if arg_str is None:
+ # Member access
+ if member == 'length':
+ return len(obj)
+ return obj[member]
+
+ assert expr.endswith(')')
+ # Function call
+ if arg_str == '':
+ argvals = tuple()
+ else:
+ argvals = tuple([
+ self.interpret_expression(v, local_vars, allow_recursion)
+ for v in arg_str.split(',')])
+
+ if member == 'split':
+ assert argvals == ('',)
+ return list(obj)
+ if member == 'join':
+ assert len(argvals) == 1
+ return argvals[0].join(obj)
+ if member == 'reverse':
+ assert len(argvals) == 0
+ obj.reverse()
+ return obj
+ if member == 'slice':
+ assert len(argvals) == 1
+ return obj[argvals[0]:]
+ if member == 'splice':
+ assert isinstance(obj, list)
+ index, howMany = argvals
+ res = []
+ for i in range(index, min(index + howMany, len(obj))):
+ res.append(obj.pop(index))
+ return res
+
+ return obj[member](argvals)
+
+ for op, opfunc in _OPERATORS:
+ m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
+ if not m:
+ continue
+ x, abort = self.interpret_statement(
+ m.group('x'), local_vars, allow_recursion - 1)
+ if abort:
+ raise ExtractorError(
+ 'Premature left-side return of %s in %r' % (op, expr))
+ y, abort = self.interpret_statement(
+ m.group('y'), local_vars, allow_recursion - 1)
+ if abort:
+ raise ExtractorError(
+ 'Premature right-side return of %s in %r' % (op, expr))
+ return opfunc(x, y)
+
+ m = re.match(
+ r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
+ if m:
+ fname = m.group('func')
+ argvals = tuple([
+ int(v) if v.isdigit() else local_vars[v]
+ for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
+ if fname not in self._functions:
+ self._functions[fname] = self.extract_function(fname)
+ return self._functions[fname](argvals)
+
+ raise ExtractorError('Unsupported JS expression %r' % expr)
+
+ def extract_object(self, objname):
+ _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
+ obj = {}
+ obj_m = re.search(
+ r'''(?x)
+ (?<!this\.)%s\s*=\s*{\s*
+ (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
+ }\s*;
+ ''' % (re.escape(objname), _FUNC_NAME_RE),
+ self.code)
+ fields = obj_m.group('fields')
+ # Currently, it only supports function definitions
+ fields_m = re.finditer(
+ r'''(?x)
+ (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
+ ''' % _FUNC_NAME_RE,
+ fields)
+ for f in fields_m:
+ argnames = f.group('args').split(',')
+ obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
+
+ return obj
+
+ def extract_function(self, funcname):
+ func_m = re.search(
+ r'''(?x)
+ (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+ \((?P<args>[^)]*)\)\s*
+ \{(?P<code>[^}]+)\}''' % (
+ re.escape(funcname), re.escape(funcname), re.escape(funcname)),
+ self.code)
+ if func_m is None:
+ raise ExtractorError('Could not find JS function %r' % funcname)
+ argnames = func_m.group('args').split(',')
+
+ return self.build_function(argnames, func_m.group('code'))
+
+ def call_function(self, funcname, *args):
+ f = self.extract_function(funcname)
+ return f(args)
+
+ def build_function(self, argnames, code):
+ def resf(args):
+ local_vars = dict(zip(argnames, args))
+ for stmt in code.split(';'):
+ res, abort = self.interpret_statement(stmt, local_vars)
+ if abort:
+ break
+ return res
+ return resf
diff --git a/hypervideo_dl/options.py b/hypervideo_dl/options.py
new file mode 100644
index 0000000..6ec5912
--- /dev/null
+++ b/hypervideo_dl/options.py
@@ -0,0 +1,916 @@
+from __future__ import unicode_literals
+
+import os.path
+import optparse
+import re
+import sys
+
+from .downloader.external import list_external_downloaders
+from .compat import (
+ compat_expanduser,
+ compat_get_terminal_size,
+ compat_getenv,
+ compat_kwargs,
+ compat_shlex_split,
+)
+from .utils import (
+ preferredencoding,
+ write_string,
+)
+from .version import __version__
+
+
+def _hide_login_info(opts):
+ PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
+ eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
+
+ def _scrub_eq(o):
+ m = eqre.match(o)
+ if m:
+ return m.group('key') + '=PRIVATE'
+ else:
+ return o
+
+ opts = list(map(_scrub_eq, opts))
+ for idx, opt in enumerate(opts):
+ if opt in PRIVATE_OPTS and idx + 1 < len(opts):
+ opts[idx + 1] = 'PRIVATE'
+ return opts
+
+
+def parseOpts(overrideArguments=None):
+ def _readOptions(filename_bytes, default=[]):
+ try:
+ optionf = open(filename_bytes)
+ except IOError:
+ return default # silently skip if file is not present
+ try:
+ # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
+ contents = optionf.read()
+ if sys.version_info < (3,):
+ contents = contents.decode(preferredencoding())
+ res = compat_shlex_split(contents, comments=True)
+ finally:
+ optionf.close()
+ return res
+
+ def _readUserConf():
+ xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
+ if xdg_config_home:
+ userConfFile = os.path.join(xdg_config_home, 'hypervideo', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(xdg_config_home, 'hypervideo.conf')
+ else:
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'hypervideo', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'hypervideo.conf')
+ userConf = _readOptions(userConfFile, None)
+
+ if userConf is None:
+ appdata_dir = compat_getenv('appdata')
+ if appdata_dir:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'hypervideo', 'config'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'hypervideo', 'config.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(compat_expanduser('~'), 'hypervideo.conf'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(compat_expanduser('~'), 'hypervideo.conf.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = []
+
+ return userConf
+
+ def _format_option_string(option):
+ ''' ('-o', '--option') -> -o, --format METAVAR'''
+
+ opts = []
+
+ if option._short_opts:
+ opts.append(option._short_opts[0])
+ if option._long_opts:
+ opts.append(option._long_opts[0])
+ if len(opts) > 1:
+ opts.insert(1, ', ')
+
+ if option.takes_value():
+ opts.append(' %s' % option.metavar)
+
+ return ''.join(opts)
+
+ def _comma_separated_values_options_callback(option, opt_str, value, parser):
+ setattr(parser.values, option.dest, value.split(','))
+
+ # No need to wrap help messages if we're on a wide console
+ columns = compat_get_terminal_size().columns
+ max_width = columns if columns else 80
+ max_help_position = 80
+
+ fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
+ fmt.format_option_strings = _format_option_string
+
+ kw = {
+ 'version': __version__,
+ 'formatter': fmt,
+ 'usage': '%prog [OPTIONS] URL [URL...]',
+ 'conflict_handler': 'resolve',
+ }
+
+ parser = optparse.OptionParser(**compat_kwargs(kw))
+
+ general = optparse.OptionGroup(parser, 'General Options')
+ general.add_option(
+ '-h', '--help',
+ action='help',
+ help='Print this help text and exit')
+ general.add_option(
+ '--version',
+ action='version',
+ help='Print program version and exit')
+ general.add_option(
+ '-i', '--ignore-errors',
+ action='store_true', dest='ignoreerrors', default=False,
+ help='Continue on download errors, for example to skip unavailable videos in a playlist')
+ general.add_option(
+ '--abort-on-error',
+ action='store_false', dest='ignoreerrors',
+ help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+ general.add_option(
+ '--dump-user-agent',
+ action='store_true', dest='dump_user_agent', default=False,
+ help='Display the current browser identification')
+ general.add_option(
+ '--list-extractors',
+ action='store_true', dest='list_extractors', default=False,
+ help='List all supported extractors')
+ general.add_option(
+ '--extractor-descriptions',
+ action='store_true', dest='list_extractor_descriptions', default=False,
+ help='Output descriptions of all supported extractors')
+ general.add_option(
+ '--force-generic-extractor',
+ action='store_true', dest='force_generic_extractor', default=False,
+ help='Force extraction to use the generic extractor')
+ general.add_option(
+ '--default-search',
+ dest='default_search', metavar='PREFIX',
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for hypervideo "large apple". Use the value "auto" to let hypervideo guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+ general.add_option(
+ '--ignore-config',
+ action='store_true',
+ help='Do not read configuration files. '
+ 'When given in the global configuration file /etc/hypervideo.conf: '
+ 'Do not read the user configuration in ~/.config/hypervideo/config '
+ '(%APPDATA%/hypervideo/config.txt on Windows)')
+ general.add_option(
+ '--config-location',
+ dest='config_location', metavar='PATH',
+ help='Location of the configuration file; either the path to the config or its containing directory.')
+ general.add_option(
+ '--flat-playlist',
+ action='store_const', dest='extract_flat', const='in_playlist',
+ default=False,
+ help='Do not extract the videos of a playlist, only list them.')
+ general.add_option(
+ '--mark-watched',
+ action='store_true', dest='mark_watched', default=False,
+ help='Mark videos watched (YouTube only)')
+ general.add_option(
+ '--no-mark-watched',
+ action='store_false', dest='mark_watched', default=False,
+ help='Do not mark videos watched (YouTube only)')
+ general.add_option(
+ '--no-color', '--no-colors',
+ action='store_true', dest='no_color',
+ default=False,
+ help='Do not emit color codes in output')
+
+ network = optparse.OptionGroup(parser, 'Network Options')
+ network.add_option(
+ '--proxy', dest='proxy',
+ default=None, metavar='URL',
+ help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
+ 'SOCKS proxy, specify a proper scheme. For example '
+ 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
+ 'for direct connection')
+ network.add_option(
+ '--socket-timeout',
+ dest='socket_timeout', type=float, default=None, metavar='SECONDS',
+ help='Time to wait before giving up, in seconds')
+ network.add_option(
+ '--source-address',
+ metavar='IP', dest='source_address', default=None,
+ help='Client-side IP address to bind to',
+ )
+ network.add_option(
+ '-4', '--force-ipv4',
+ action='store_const', const='0.0.0.0', dest='source_address',
+ help='Make all connections via IPv4',
+ )
+ network.add_option(
+ '-6', '--force-ipv6',
+ action='store_const', const='::', dest='source_address',
+ help='Make all connections via IPv6',
+ )
+
+ geo = optparse.OptionGroup(parser, 'Geo Restriction')
+ geo.add_option(
+ '--geo-verification-proxy',
+ dest='geo_verification_proxy', default=None, metavar='URL',
+ help='Use this proxy to verify the IP address for some geo-restricted sites. '
+ 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
+ geo.add_option(
+ '--cn-verification-proxy',
+ dest='cn_verification_proxy', default=None, metavar='URL',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--geo-bypass',
+ action='store_true', dest='geo_bypass', default=True,
+ help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
+ geo.add_option(
+ '--no-geo-bypass',
+ action='store_false', dest='geo_bypass', default=True,
+ help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
+ geo.add_option(
+ '--geo-bypass-country', metavar='CODE',
+ dest='geo_bypass_country', default=None,
+ help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
+ geo.add_option(
+ '--geo-bypass-ip-block', metavar='IP_BLOCK',
+ dest='geo_bypass_ip_block', default=None,
+ help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
+
+ selection = optparse.OptionGroup(parser, 'Video Selection')
+ selection.add_option(
+ '--playlist-start',
+ dest='playliststart', metavar='NUMBER', default=1, type=int,
+ help='Playlist video to start at (default is %default)')
+ selection.add_option(
+ '--playlist-end',
+ dest='playlistend', metavar='NUMBER', default=None, type=int,
+ help='Playlist video to end at (default is last)')
+ selection.add_option(
+ '--playlist-items',
+ dest='playlist_items', metavar='ITEM_SPEC', default=None,
+ help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ selection.add_option(
+ '--match-title',
+ dest='matchtitle', metavar='REGEX',
+ help='Download only matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--reject-title',
+ dest='rejecttitle', metavar='REGEX',
+ help='Skip download for matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--max-downloads',
+ dest='max_downloads', metavar='NUMBER', type=int, default=None,
+ help='Abort after downloading NUMBER files')
+ selection.add_option(
+ '--min-filesize',
+ metavar='SIZE', dest='min_filesize', default=None,
+ help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--max-filesize',
+ metavar='SIZE', dest='max_filesize', default=None,
+ help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--date',
+ metavar='DATE', dest='date', default=None,
+ help='Download only videos uploaded in this date')
+ selection.add_option(
+ '--datebefore',
+ metavar='DATE', dest='datebefore', default=None,
+ help='Download only videos uploaded on or before this date (i.e. inclusive)')
+ selection.add_option(
+ '--dateafter',
+ metavar='DATE', dest='dateafter', default=None,
+ help='Download only videos uploaded on or after this date (i.e. inclusive)')
+ selection.add_option(
+ '--min-views',
+ metavar='COUNT', dest='min_views', default=None, type=int,
+ help='Do not download any videos with less than COUNT views')
+ selection.add_option(
+ '--max-views',
+ metavar='COUNT', dest='max_views', default=None, type=int,
+ help='Do not download any videos with more than COUNT views')
+ selection.add_option(
+ '--match-filter',
+ metavar='FILTER', dest='match_filter', default=None,
+ help=(
+ 'Generic video filter. '
+ 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
+ 'match if the key is present, '
+ '!key to check if the key is not present, '
+ 'key > NUMBER (like "comment_count > 12", also works with '
+ '>=, <, <=, !=, =) to compare against a number, '
+ 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
+ 'to match against a string literal '
+ 'and & to require multiple matches. '
+ 'Values which are not known are excluded unless you '
+ 'put a question mark (?) after the operator. '
+ 'For example, to only match videos that have been liked more than '
+ '100 times and disliked less than 50 times (or the dislike '
+ 'functionality is not available at the given service), but who '
+ 'also have a description, use --match-filter '
+ '"like_count > 100 & dislike_count <? 50 & description" .'
+ ))
+ selection.add_option(
+ '--no-playlist',
+ action='store_true', dest='noplaylist', default=False,
+ help='Download only the video, if the URL refers to a video and a playlist.')
+ selection.add_option(
+ '--yes-playlist',
+ action='store_false', dest='noplaylist', default=False,
+ help='Download the playlist, if the URL refers to a video and a playlist.')
+ selection.add_option(
+ '--age-limit',
+ metavar='YEARS', dest='age_limit', default=None, type=int,
+ help='Download only videos suitable for the given age')
+ selection.add_option(
+ '--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+ selection.add_option(
+ '--include-ads',
+ dest='include_ads', action='store_true',
+ help='Download advertisements as well (experimental)')
+
+ authentication = optparse.OptionGroup(parser, 'Authentication Options')
+ authentication.add_option(
+ '-u', '--username',
+ dest='username', metavar='USERNAME',
+ help='Login with this account ID')
+ authentication.add_option(
+ '-p', '--password',
+ dest='password', metavar='PASSWORD',
+ help='Account password. If this option is left out, hypervideo will ask interactively.')
+ authentication.add_option(
+ '-2', '--twofactor',
+ dest='twofactor', metavar='TWOFACTOR',
+ help='Two-factor authentication code')
+ authentication.add_option(
+ '-n', '--netrc',
+ action='store_true', dest='usenetrc', default=False,
+ help='Use .netrc authentication data')
+ authentication.add_option(
+ '--video-password',
+ dest='videopassword', metavar='PASSWORD',
+ help='Video password (vimeo, youku)')
+
+ adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
+ adobe_pass.add_option(
+ '--ap-mso',
+ dest='ap_mso', metavar='MSO',
+ help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
+ adobe_pass.add_option(
+ '--ap-username',
+ dest='ap_username', metavar='USERNAME',
+ help='Multiple-system operator account login')
+ adobe_pass.add_option(
+ '--ap-password',
+ dest='ap_password', metavar='PASSWORD',
+ help='Multiple-system operator account password. If this option is left out, hypervideo will ask interactively.')
+ adobe_pass.add_option(
+ '--ap-list-mso',
+ action='store_true', dest='ap_list_mso', default=False,
+ help='List all supported multiple-system operators')
+
+ video_format = optparse.OptionGroup(parser, 'Video Format Options')
+ video_format.add_option(
+ '-f', '--format',
+ action='store', dest='format', metavar='FORMAT', default=None,
+ help='Video format code, see the "FORMAT SELECTION" for all the info')
+ video_format.add_option(
+ '--all-formats',
+ action='store_const', dest='format', const='all',
+ help='Download all available video formats')
+ video_format.add_option(
+ '--prefer-free-formats',
+ action='store_true', dest='prefer_free_formats', default=False,
+ help='Prefer free video formats unless a specific one is requested')
+ video_format.add_option(
+ '-F', '--list-formats',
+ action='store_true', dest='listformats',
+ help='List all available formats of requested videos')
+ video_format.add_option(
+ '--youtube-include-dash-manifest',
+ action='store_true', dest='youtube_include_dash_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--youtube-skip-dash-manifest',
+ action='store_false', dest='youtube_include_dash_manifest',
+ help='Do not download the DASH manifests and related data on YouTube videos')
+ video_format.add_option(
+ '--merge-output-format',
+ action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+ help=(
+ 'If a merge is required (e.g. bestvideo+bestaudio), '
+ 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
+ 'Ignored if no merge is required'))
+
+ subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
+ subtitles.add_option(
+ '--write-sub', '--write-srt',
+ action='store_true', dest='writesubtitles', default=False,
+ help='Write subtitle file')
+ subtitles.add_option(
+ '--write-auto-sub', '--write-automatic-sub',
+ action='store_true', dest='writeautomaticsub', default=False,
+ help='Write automatically generated subtitle file (YouTube only)')
+ subtitles.add_option(
+ '--all-subs',
+ action='store_true', dest='allsubtitles', default=False,
+ help='Download all the available subtitles of the video')
+ subtitles.add_option(
+ '--list-subs',
+ action='store_true', dest='listsubtitles', default=False,
+ help='List all available subtitles for the video')
+ subtitles.add_option(
+ '--sub-format',
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+ help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
+ subtitles.add_option(
+ '--sub-lang', '--sub-langs', '--srt-lang',
+ action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+ default=[], callback=_comma_separated_values_options_callback,
+ help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
+
+ downloader = optparse.OptionGroup(parser, 'Download Options')
+ downloader.add_option(
+ '-r', '--limit-rate', '--rate-limit',
+ dest='ratelimit', metavar='RATE',
+ help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ downloader.add_option(
+ '-R', '--retries',
+ dest='retries', metavar='RETRIES', default=10,
+ help='Number of retries (default is %default), or "infinite".')
+ downloader.add_option(
+ '--fragment-retries',
+ dest='fragment_retries', metavar='RETRIES', default=10,
+ help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--skip-unavailable-fragments',
+ action='store_true', dest='skip_unavailable_fragments', default=True,
+ help='Skip unavailable fragments (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--abort-on-unavailable-fragment',
+ action='store_false', dest='skip_unavailable_fragments',
+ help='Abort downloading when some fragment is not available')
+ downloader.add_option(
+ '--keep-fragments',
+ action='store_true', dest='keep_fragments', default=False,
+ help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
+ downloader.add_option(
+ '--buffer-size',
+ dest='buffersize', metavar='SIZE', default='1024',
+ help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ downloader.add_option(
+ '--no-resize-buffer',
+ action='store_true', dest='noresizebuffer', default=False,
+ help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ downloader.add_option(
+ '--http-chunk-size',
+ dest='http_chunk_size', metavar='SIZE', default=None,
+ help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+ 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
+ downloader.add_option(
+ '--test',
+ action='store_true', dest='test', default=False,
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--playlist-reverse',
+ action='store_true',
+ help='Download playlist videos in reverse order')
+ downloader.add_option(
+ '--playlist-random',
+ action='store_true',
+ help='Download playlist videos in random order')
+ downloader.add_option(
+ '--xattr-set-filesize',
+ dest='xattr_set_filesize', action='store_true',
+ help='Set file xattribute ytdl.filesize with expected file size')
+ downloader.add_option(
+ '--hls-prefer-native',
+ dest='hls_prefer_native', action='store_true', default=None,
+ help='Use the native HLS downloader instead of ffmpeg')
+ downloader.add_option(
+ '--hls-prefer-ffmpeg',
+ dest='hls_prefer_native', action='store_false', default=None,
+ help='Use ffmpeg instead of the native HLS downloader')
+ downloader.add_option(
+ '--hls-use-mpegts',
+ dest='hls_use_mpegts', action='store_true',
+ help='Use the mpegts container for HLS videos, allowing to play the '
+ 'video while downloading (some players may not be able to play it)')
+ downloader.add_option(
+ '--external-downloader',
+ dest='external_downloader', metavar='COMMAND',
+ help='Use the specified external downloader. '
+ 'Currently supports %s' % ','.join(list_external_downloaders()))
+ downloader.add_option(
+ '--external-downloader-args',
+ dest='external_downloader_args', metavar='ARGS',
+ help='Give these arguments to the external downloader')
+
+ workarounds = optparse.OptionGroup(parser, 'Workarounds')
+ workarounds.add_option(
+ '--encoding',
+ dest='encoding', metavar='ENCODING',
+ help='Force the specified encoding (experimental)')
+ workarounds.add_option(
+ '--no-check-certificate',
+ action='store_true', dest='no_check_certificate', default=False,
+ help='Suppress HTTPS certificate validation')
+ workarounds.add_option(
+ '--prefer-insecure',
+ '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+ workarounds.add_option(
+ '--user-agent',
+ metavar='UA', dest='user_agent',
+ help='Specify a custom user agent')
+ workarounds.add_option(
+ '--referer',
+ metavar='URL', dest='referer', default=None,
+ help='Specify a custom referer, use if the video access is restricted to one domain',
+ )
+ workarounds.add_option(
+ '--add-header',
+ metavar='FIELD:VALUE', dest='headers', action='append',
+ help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ )
+ workarounds.add_option(
+ '--bidi-workaround',
+ dest='bidi_workaround', action='store_true',
+ help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+ workarounds.add_option(
+ '--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
+ dest='sleep_interval', type=float,
+ help=(
+ 'Number of seconds to sleep before each download when used alone '
+ 'or a lower bound of a range for randomized sleep before each download '
+ '(minimum possible number of seconds to sleep) when used along with '
+ '--max-sleep-interval.'))
+ workarounds.add_option(
+ '--max-sleep-interval', metavar='SECONDS',
+ dest='max_sleep_interval', type=float,
+ help=(
+ 'Upper bound of a range for randomized sleep before each download '
+ '(maximum possible number of seconds to sleep). Must only be used '
+ 'along with --min-sleep-interval.'))
+
+ verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+ verbosity.add_option(
+ '-q', '--quiet',
+ action='store_true', dest='quiet', default=False,
+ help='Activate quiet mode')
+ verbosity.add_option(
+ '--no-warnings',
+ dest='no_warnings', action='store_true', default=False,
+ help='Ignore warnings')
+ verbosity.add_option(
+ '-s', '--simulate',
+ action='store_true', dest='simulate', default=False,
+ help='Do not download the video and do not write anything to disk')
+ verbosity.add_option(
+ '--skip-download',
+ action='store_true', dest='skip_download', default=False,
+ help='Do not download the video')
+ verbosity.add_option(
+ '-g', '--get-url',
+ action='store_true', dest='geturl', default=False,
+ help='Simulate, quiet but print URL')
+ verbosity.add_option(
+ '-e', '--get-title',
+ action='store_true', dest='gettitle', default=False,
+ help='Simulate, quiet but print title')
+ verbosity.add_option(
+ '--get-id',
+ action='store_true', dest='getid', default=False,
+ help='Simulate, quiet but print id')
+ verbosity.add_option(
+ '--get-thumbnail',
+ action='store_true', dest='getthumbnail', default=False,
+ help='Simulate, quiet but print thumbnail URL')
+ verbosity.add_option(
+ '--get-description',
+ action='store_true', dest='getdescription', default=False,
+ help='Simulate, quiet but print video description')
+ verbosity.add_option(
+ '--get-duration',
+ action='store_true', dest='getduration', default=False,
+ help='Simulate, quiet but print video length')
+ verbosity.add_option(
+ '--get-filename',
+ action='store_true', dest='getfilename', default=False,
+ help='Simulate, quiet but print output filename')
+ verbosity.add_option(
+ '--get-format',
+ action='store_true', dest='getformat', default=False,
+ help='Simulate, quiet but print output format')
+ verbosity.add_option(
+ '-j', '--dump-json',
+ action='store_true', dest='dumpjson', default=False,
+ help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
+ verbosity.add_option(
+ '-J', '--dump-single-json',
+ action='store_true', dest='dump_single_json', default=False,
+ help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ verbosity.add_option(
+ '--print-json',
+ action='store_true', dest='print_json', default=False,
+ help='Be quiet and print the video information as JSON (video is still being downloaded).',
+ )
+ verbosity.add_option(
+ '--newline',
+ action='store_true', dest='progress_with_newline', default=False,
+ help='Output progress bar as new lines')
+ verbosity.add_option(
+ '--no-progress',
+ action='store_true', dest='noprogress', default=False,
+ help='Do not print progress bar')
+ verbosity.add_option(
+ '--console-title',
+ action='store_true', dest='consoletitle', default=False,
+ help='Display progress in console titlebar')
+ verbosity.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='Print various debugging information')
+ verbosity.add_option(
+ '--dump-pages', '--dump-intermediate-pages',
+ action='store_true', dest='dump_intermediate_pages', default=False,
+ help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
+ verbosity.add_option(
+ '--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded intermediary pages to files in the current directory to debug problems')
+ verbosity.add_option(
+ '--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--print-traffic', '--dump-headers',
+ dest='debug_printtraffic', action='store_true', default=False,
+ help='Display sent and read HTTP traffic')
+ verbosity.add_option(
+ '-C', '--call-home',
+ dest='call_home', action='store_true', default=False,
+ help='Contact the hypervideo server for debugging')
+ verbosity.add_option(
+ '--no-call-home',
+ dest='call_home', action='store_false', default=False,
+ help='Do NOT contact the hypervideo server for debugging')
+
+ filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+ filesystem.add_option(
+ '-a', '--batch-file',
+ dest='batchfile', metavar='FILE',
+ help="File containing URLs to download ('-' for stdin), one URL per line. "
+ "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
+ filesystem.add_option(
+ '--id', default=False,
+ action='store_true', dest='useid', help='Use only video ID in file name')
+ filesystem.add_option(
+ '-o', '--output',
+ dest='outtmpl', metavar='TEMPLATE',
+ help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
+ filesystem.add_option(
+ '--output-na-placeholder',
+ dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
+ help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
+ filesystem.add_option(
+ '--autonumber-size',
+ dest='autonumber_size', metavar='NUMBER', type=int,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--autonumber-start',
+ dest='autonumber_start', metavar='NUMBER', default=1, type=int,
+ help='Specify the start value for %(autonumber)s (default is %default)')
+ filesystem.add_option(
+ '--restrict-filenames',
+ action='store_true', dest='restrictfilenames', default=False,
+ help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
+ filesystem.add_option(
+ '-A', '--auto-number',
+ action='store_true', dest='autonumber', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-t', '--title',
+ action='store_true', dest='usetitle', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-l', '--literal', default=False,
+ action='store_true', dest='usetitle',
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-w', '--no-overwrites',
+ action='store_true', dest='nooverwrites', default=False,
+ help='Do not overwrite files')
+ filesystem.add_option(
+ '-c', '--continue',
+ action='store_true', dest='continue_dl', default=True,
+ help='Force resume of partially downloaded files. By default, hypervideo will resume downloads if possible.')
+ filesystem.add_option(
+ '--no-continue',
+ action='store_false', dest='continue_dl',
+ help='Do not resume partially downloaded files (restart from beginning)')
+ filesystem.add_option(
+ '--no-part',
+ action='store_true', dest='nopart', default=False,
+ help='Do not use .part files - write directly into output file')
+ filesystem.add_option(
+ '--no-mtime',
+ action='store_false', dest='updatetime', default=True,
+ help='Do not use the Last-modified header to set the file modification time')
+ filesystem.add_option(
+ '--write-description',
+ action='store_true', dest='writedescription', default=False,
+ help='Write video description to a .description file')
+ filesystem.add_option(
+ '--write-info-json',
+ action='store_true', dest='writeinfojson', default=False,
+ help='Write video metadata to a .info.json file')
+ filesystem.add_option(
+ '--write-annotations',
+ action='store_true', dest='writeannotations', default=False,
+ help='Write video annotations to a .annotations.xml file')
+ filesystem.add_option(
+ '--load-info-json', '--load-info',
+ dest='load_info_filename', metavar='FILE',
+ help='JSON file containing the video information (created with the "--write-info-json" option)')
+ filesystem.add_option(
+ '--cookies',
+ dest='cookiefile', metavar='FILE',
+ help='File to read cookies from and dump cookie jar in')
+ filesystem.add_option(
+ '--cache-dir', dest='cachedir', default=None, metavar='DIR',
+ help='Location in the filesystem where hypervideo can store some downloaded information permanently. By default $XDG_CACHE_HOME/hypervideo or ~/.cache/hypervideo . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+ filesystem.add_option(
+ '--no-cache-dir', action='store_const', const=False, dest='cachedir',
+ help='Disable filesystem caching')
+ filesystem.add_option(
+ '--rm-cache-dir',
+ action='store_true', dest='rm_cachedir',
+ help='Delete all filesystem cache files')
+
+ thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
+ thumbnail.add_option(
+ '--write-thumbnail',
+ action='store_true', dest='writethumbnail', default=False,
+ help='Write thumbnail image to disk')
+ thumbnail.add_option(
+ '--write-all-thumbnails',
+ action='store_true', dest='write_all_thumbnails', default=False,
+ help='Write all thumbnail image formats to disk')
+ thumbnail.add_option(
+ '--list-thumbnails',
+ action='store_true', dest='list_thumbnails', default=False,
+ help='Simulate and list all available thumbnail formats')
+
+ postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+ postproc.add_option(
+ '-x', '--extract-audio',
+ action='store_true', dest='extractaudio', default=False,
+ help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
+ postproc.add_option(
+ '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+ help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
+ postproc.add_option(
+ '--audio-quality', metavar='QUALITY',
+ dest='audioquality', default='5',
+ help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ postproc.add_option(
+ '--recode-video',
+ metavar='FORMAT', dest='recodevideo', default=None,
+ help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
+ postproc.add_option(
+ '--postprocessor-args',
+ dest='postprocessor_args', metavar='ARGS',
+ help='Give these arguments to the postprocessor')
+ postproc.add_option(
+ '-k', '--keep-video',
+ action='store_true', dest='keepvideo', default=False,
+ help='Keep the video file on disk after the post-processing; the video is erased by default')
+ postproc.add_option(
+ '--no-post-overwrites',
+ action='store_true', dest='nopostoverwrites', default=False,
+ help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
+ postproc.add_option(
+ '--embed-subs',
+ action='store_true', dest='embedsubtitles', default=False,
+ help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
+ postproc.add_option(
+ '--embed-thumbnail',
+ action='store_true', dest='embedthumbnail', default=False,
+ help='Embed thumbnail in the audio as cover art')
+ postproc.add_option(
+ '--add-metadata',
+ action='store_true', dest='addmetadata', default=False,
+ help='Write metadata to the video file')
+ postproc.add_option(
+ '--metadata-from-title',
+ metavar='FORMAT', dest='metafromtitle',
+ help='Parse additional metadata like song title / artist from the video title. '
+ 'The format syntax is the same as --output. Regular expression with '
+ 'named capture groups may also be used. '
+ 'The parsed parameters replace existing values. '
+ 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+ '"Coldplay - Paradise". '
+ 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
+ postproc.add_option(
+ '--xattrs',
+ action='store_true', dest='xattrs', default=False,
+ help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ postproc.add_option(
+ '--fixup',
+ metavar='POLICY', dest='fixup', default='detect_or_warn',
+ help='Automatically correct known faults of the file. '
+ 'One of never (do nothing), warn (only emit a warning), '
+ 'detect_or_warn (the default; fix file if we can, warn otherwise)')
+ postproc.add_option(
+ '--prefer-avconv',
+ action='store_false', dest='prefer_ffmpeg',
+ help='Prefer avconv over ffmpeg for running the postprocessors')
+ postproc.add_option(
+ '--prefer-ffmpeg',
+ action='store_true', dest='prefer_ffmpeg',
+ help='Prefer ffmpeg over avconv for running the postprocessors (default)')
+ postproc.add_option(
+ '--ffmpeg-location', '--avconv-location', metavar='PATH',
+ dest='ffmpeg_location',
+ help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
+ postproc.add_option(
+ '--exec',
+ metavar='CMD', dest='exec_cmd',
+ help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+ postproc.add_option(
+ '--convert-subs', '--convert-subtitles',
+ metavar='FORMAT', dest='convertsubtitles', default=None,
+ help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
+
+ parser.add_option_group(general)
+ parser.add_option_group(network)
+ parser.add_option_group(geo)
+ parser.add_option_group(selection)
+ parser.add_option_group(downloader)
+ parser.add_option_group(filesystem)
+ parser.add_option_group(thumbnail)
+ parser.add_option_group(verbosity)
+ parser.add_option_group(workarounds)
+ parser.add_option_group(video_format)
+ parser.add_option_group(subtitles)
+ parser.add_option_group(authentication)
+ parser.add_option_group(adobe_pass)
+ parser.add_option_group(postproc)
+
+ if overrideArguments is not None:
+ opts, args = parser.parse_args(overrideArguments)
+ if opts.verbose:
+ write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
+ else:
+ def compat_conf(conf):
+ if sys.version_info < (3,):
+ return [a.decode(preferredencoding(), 'replace') for a in conf]
+ return conf
+
+ command_line_conf = compat_conf(sys.argv[1:])
+ opts, args = parser.parse_args(command_line_conf)
+
+ system_conf = user_conf = custom_conf = []
+
+ if '--config-location' in command_line_conf:
+ location = compat_expanduser(opts.config_location)
+ if os.path.isdir(location):
+ location = os.path.join(location, 'hypervideo.conf')
+ if not os.path.exists(location):
+ parser.error('config-location %s does not exist.' % location)
+ custom_conf = _readOptions(location)
+ elif '--ignore-config' in command_line_conf:
+ pass
+ else:
+ system_conf = _readOptions('/etc/hypervideo.conf')
+ if '--ignore-config' not in system_conf:
+ user_conf = _readUserConf()
+
+ argv = system_conf + user_conf + custom_conf + command_line_conf
+ opts, args = parser.parse_args(argv)
+ if opts.verbose:
+ for conf_label, conf in (
+ ('System config', system_conf),
+ ('User config', user_conf),
+ ('Custom config', custom_conf),
+ ('Command-line args', command_line_conf)):
+ write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
+
+ return parser, opts, args
diff --git a/hypervideo_dl/postprocessor/__init__.py b/hypervideo_dl/postprocessor/__init__.py
new file mode 100644
index 0000000..3ea5183
--- /dev/null
+++ b/hypervideo_dl/postprocessor/__init__.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+
+from .embedthumbnail import EmbedThumbnailPP
+from .ffmpeg import (
+ FFmpegPostProcessor,
+ FFmpegEmbedSubtitlePP,
+ FFmpegExtractAudioPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegMergerPP,
+ FFmpegMetadataPP,
+ FFmpegVideoConvertorPP,
+ FFmpegSubtitlesConvertorPP,
+)
+from .xattrpp import XAttrMetadataPP
+from .execafterdownload import ExecAfterDownloadPP
+from .metadatafromtitle import MetadataFromTitlePP
+
+
+def get_postprocessor(key):
+ return globals()[key + 'PP']
+
+
+__all__ = [
+ 'EmbedThumbnailPP',
+ 'ExecAfterDownloadPP',
+ 'FFmpegEmbedSubtitlePP',
+ 'FFmpegExtractAudioPP',
+ 'FFmpegFixupM3u8PP',
+ 'FFmpegFixupM4aPP',
+ 'FFmpegFixupStretchedPP',
+ 'FFmpegMergerPP',
+ 'FFmpegMetadataPP',
+ 'FFmpegPostProcessor',
+ 'FFmpegSubtitlesConvertorPP',
+ 'FFmpegVideoConvertorPP',
+ 'MetadataFromTitlePP',
+ 'XAttrMetadataPP',
+]
diff --git a/hypervideo_dl/postprocessor/common.py b/hypervideo_dl/postprocessor/common.py
new file mode 100644
index 0000000..599dd1d
--- /dev/null
+++ b/hypervideo_dl/postprocessor/common.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import os
+
+from ..utils import (
+ PostProcessingError,
+ cli_configuration_args,
+ encodeFilename,
+)
+
+
+class PostProcessor(object):
+ """Post Processor class.
+
+ PostProcessor objects can be added to downloaders with their
+ add_post_processor() method. When the downloader has finished a
+ successful download, it will take its internal chain of PostProcessors
+ and start calling the run() method on each one of them, first with
+ an initial argument and then with the returned value of the previous
+ PostProcessor.
+
+ The chain will be stopped if one of them ever returns None or the end
+ of the chain is reached.
+
+ PostProcessor objects follow a "mutual registration" process similar
+ to InfoExtractor objects.
+
+ Optionally PostProcessor can use a list of additional command-line arguments
+ with self._configuration_args.
+ """
+
+ _downloader = None
+
+ def __init__(self, downloader=None):
+ self._downloader = downloader
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this PP."""
+ self._downloader = downloader
+
+ def run(self, information):
+ """Run the PostProcessor.
+
+ The "information" argument is a dictionary like the ones
+ composed by InfoExtractors. The only difference is that this
+ one has an extra field called "filepath" that points to the
+ downloaded file.
+
+ This method returns a tuple, the first element is a list of the files
+ that can be deleted, and the second of which is the updated
+ information.
+
+ In addition, this method may raise a PostProcessingError
+ exception if post processing fails.
+ """
+ return [], information # by default, keep file and do nothing
+
+ def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+ try:
+ os.utime(encodeFilename(path), (atime, mtime))
+ except Exception:
+ self._downloader.report_warning(errnote)
+
+ def _configuration_args(self, default=[]):
+ return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
+
+
+class AudioConversionError(PostProcessingError):
+ pass
diff --git a/hypervideo_dl/postprocessor/embedthumbnail.py b/hypervideo_dl/postprocessor/embedthumbnail.py
new file mode 100644
index 0000000..3990908
--- /dev/null
+++ b/hypervideo_dl/postprocessor/embedthumbnail.py
@@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .ffmpeg import FFmpegPostProcessor
+
+from ..utils import (
+ check_executable,
+ encodeArgument,
+ encodeFilename,
+ PostProcessingError,
+ prepend_extension,
+ replace_extension,
+ shell_quote
+)
+
+
+class EmbedThumbnailPPError(PostProcessingError):
+ pass
+
+
+class EmbedThumbnailPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, already_have_thumbnail=False):
+ super(EmbedThumbnailPP, self).__init__(downloader)
+ self._already_have_thumbnail = already_have_thumbnail
+
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ if not info.get('thumbnails'):
+ self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
+ return [], info
+
+ thumbnail_filename = info['thumbnails'][-1]['filename']
+
+ if not os.path.exists(encodeFilename(thumbnail_filename)):
+ self._downloader.report_warning(
+ 'Skipping embedding the thumbnail because the file is missing.')
+ return [], info
+
+ def is_webp(path):
+ with open(encodeFilename(path), 'rb') as f:
+ b = f.read(12)
+ return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
+
+ # Correct extension for WebP file with wrong extension (see #25687, #25717)
+ _, thumbnail_ext = os.path.splitext(thumbnail_filename)
+ if thumbnail_ext:
+ thumbnail_ext = thumbnail_ext[1:].lower()
+ if thumbnail_ext != 'webp' and is_webp(thumbnail_filename):
+ self._downloader.to_screen(
+ '[ffmpeg] Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename)
+ thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp')
+ os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename))
+ thumbnail_filename = thumbnail_webp_filename
+ thumbnail_ext = 'webp'
+
+ # Convert unsupported thumbnail formats to JPEG (see #25687, #25717)
+ if thumbnail_ext not in ['jpg', 'png']:
+ # NB: % is supposed to be escaped with %% but this does not work
+ # for input files so working around with standard substitution
+ escaped_thumbnail_filename = thumbnail_filename.replace('%', '#')
+ os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename))
+ escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg')
+ self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename)
+ self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg'])
+ os.remove(encodeFilename(escaped_thumbnail_filename))
+ thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg')
+ # Rename back to unescaped for further processing
+ os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
+ thumbnail_filename = thumbnail_jpg_filename
+
+ if info['ext'] == 'mp3':
+ options = [
+ '-c', 'copy', '-map', '0', '-map', '1',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
+
+ self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
+
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ elif info['ext'] in ['m4a', 'mp4']:
+ atomicparsley = next((x
+ for x in ['AtomicParsley', 'atomicparsley']
+ if check_executable(x, ['-v'])), None)
+
+ if atomicparsley is None:
+ raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+
+ cmd = [encodeFilename(atomicparsley, True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+
+ self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ raise EmbedThumbnailPPError(msg)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+ else:
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ else:
+ raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
+
+ return [], info
diff --git a/hypervideo_dl/postprocessor/execafterdownload.py b/hypervideo_dl/postprocessor/execafterdownload.py
new file mode 100644
index 0000000..64dabe7
--- /dev/null
+++ b/hypervideo_dl/postprocessor/execafterdownload.py
@@ -0,0 +1,31 @@
+from __future__ import unicode_literals
+
+import subprocess
+
+from .common import PostProcessor
+from ..compat import compat_shlex_quote
+from ..utils import (
+ encodeArgument,
+ PostProcessingError,
+)
+
+
+class ExecAfterDownloadPP(PostProcessor):
+ def __init__(self, downloader, exec_cmd):
+ super(ExecAfterDownloadPP, self).__init__(downloader)
+ self.exec_cmd = exec_cmd
+
+ def run(self, information):
+ cmd = self.exec_cmd
+ if '{}' not in cmd:
+ cmd += ' {}'
+
+ cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
+
+ self._downloader.to_screen('[exec] Executing command: %s' % cmd)
+ retCode = subprocess.call(encodeArgument(cmd), shell=True)
+ if retCode != 0:
+ raise PostProcessingError(
+ 'Command returned error code %d' % retCode)
+
+ return [], information
diff --git a/hypervideo_dl/postprocessor/ffmpeg.py b/hypervideo_dl/postprocessor/ffmpeg.py
new file mode 100644
index 0000000..3078329
--- /dev/null
+++ b/hypervideo_dl/postprocessor/ffmpeg.py
@@ -0,0 +1,657 @@
+from __future__ import unicode_literals
+
+import io
+import os
+import subprocess
+import time
+import re
+
+
+from .common import AudioConversionError, PostProcessor
+
+from ..utils import (
+ encodeArgument,
+ encodeFilename,
+ get_exe_version,
+ is_outdated_version,
+ PostProcessingError,
+ prepend_extension,
+ shell_quote,
+ subtitles_filename,
+ dfxp2srt,
+ ISO639Utils,
+ replace_extension,
+)
+
+
+EXT_TO_OUT_FORMATS = {
+ 'aac': 'adts',
+ 'flac': 'flac',
+ 'm4a': 'ipod',
+ 'mka': 'matroska',
+ 'mkv': 'matroska',
+ 'mpg': 'mpeg',
+ 'ogv': 'ogg',
+ 'ts': 'mpegts',
+ 'wma': 'asf',
+ 'wmv': 'asf',
+}
+ACODECS = {
+ 'mp3': 'libmp3lame',
+ 'aac': 'aac',
+ 'flac': 'flac',
+ 'm4a': 'aac',
+ 'opus': 'libopus',
+ 'vorbis': 'libvorbis',
+ 'wav': None,
+}
+
+
+class FFmpegPostProcessorError(PostProcessingError):
+ pass
+
+
+class FFmpegPostProcessor(PostProcessor):
+ def __init__(self, downloader=None):
+ PostProcessor.__init__(self, downloader)
+ self._determine_executables()
+
+ def check_version(self):
+ if not self.available:
+ raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
+
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+
+ @staticmethod
+ def get_versions(downloader=None):
+ return FFmpegPostProcessor(downloader)._versions
+
+ def _determine_executables(self):
+ programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+ prefer_ffmpeg = True
+
+ def get_ffmpeg_version(path):
+ ver = get_exe_version(path, args=['-version'])
+ if ver:
+ regexs = [
+ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
+ r'n([0-9.]+)$', # Arch Linux
+ # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+ ]
+ for regex in regexs:
+ mobj = re.match(regex, ver)
+ if mobj:
+ ver = mobj.group(1)
+ return ver
+
+ self.basename = None
+ self.probe_basename = None
+
+ self._paths = None
+ self._versions = None
+ if self._downloader:
+ prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
+ location = self._downloader.params.get('ffmpeg_location')
+ if location is not None:
+ if not os.path.exists(location):
+ self._downloader.report_warning(
+ 'ffmpeg-location %s does not exist! '
+ 'Continuing without avconv/ffmpeg.' % (location))
+ self._versions = {}
+ return
+ elif not os.path.isdir(location):
+ basename = os.path.splitext(os.path.basename(location))[0]
+ if basename not in programs:
+ self._downloader.report_warning(
+ 'Cannot identify executable %s, its basename should be one of %s. '
+ 'Continuing without avconv/ffmpeg.' %
+ (location, ', '.join(programs)))
+ self._versions = {}
+ return None
+ location = os.path.dirname(os.path.abspath(location))
+ if basename in ('ffmpeg', 'ffprobe'):
+ prefer_ffmpeg = True
+
+ self._paths = dict(
+ (p, os.path.join(location, p)) for p in programs)
+ self._versions = dict(
+ (p, get_ffmpeg_version(self._paths[p])) for p in programs)
+ if self._versions is None:
+ self._versions = dict(
+ (p, get_ffmpeg_version(p)) for p in programs)
+ self._paths = dict((p, p) for p in programs)
+
+ if prefer_ffmpeg is False:
+ prefs = ('avconv', 'ffmpeg')
+ else:
+ prefs = ('ffmpeg', 'avconv')
+ for p in prefs:
+ if self._versions[p]:
+ self.basename = p
+ break
+
+ if prefer_ffmpeg is False:
+ prefs = ('avprobe', 'ffprobe')
+ else:
+ prefs = ('ffprobe', 'avprobe')
+ for p in prefs:
+ if self._versions[p]:
+ self.probe_basename = p
+ break
+
+ @property
+ def available(self):
+ return self.basename is not None
+
+ @property
+ def executable(self):
+ return self._paths[self.basename]
+
+ @property
+ def probe_available(self):
+ return self.probe_basename is not None
+
+ @property
+ def probe_executable(self):
+ return self._paths[self.probe_basename]
+
+ def get_audio_codec(self, path):
+ if not self.probe_available and not self.available:
+ raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.')
+ try:
+ if self.probe_available:
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-show_streams')]
+ else:
+ cmd = [
+ encodeFilename(self.executable, True),
+ encodeArgument('-i')]
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ handle = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout_data, stderr_data = handle.communicate()
+ expected_ret = 0 if self.probe_available else 1
+ if handle.wait() != expected_ret:
+ return None
+ except (IOError, OSError):
+ return None
+ output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
+ if self.probe_available:
+ audio_codec = None
+ for line in output.split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ else:
+ # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
+ mobj = re.search(
+ r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
+ output)
+ if mobj:
+ return mobj.group(1)
+ return None
+
+ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
+ self.check_version()
+
+ oldest_mtime = min(
+ os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+
+ opts += self._configuration_args()
+
+ files_cmd = []
+ for path in input_paths:
+ files_cmd.extend([
+ encodeArgument('-i'),
+ encodeFilename(self._ffmpeg_filename_argument(path), True)
+ ])
+ cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
+ # avconv does not have repeat option
+ if self.basename == 'ffmpeg':
+ cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
+ cmd += (files_cmd
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode != 0:
+ stderr = stderr.decode('utf-8', 'replace')
+ msg = stderr.strip().split('\n')[-1]
+ raise FFmpegPostProcessorError(msg)
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
+
+ def run_ffmpeg(self, path, out_path, opts):
+ self.run_ffmpeg_multiple_files([path], out_path, opts)
+
+ def _ffmpeg_filename_argument(self, fn):
+ # Always use 'file:' because the filename may contain ':' (ffmpeg
+ # interprets that as a protocol) or can start with '-' (-- is broken in
+ # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
+ # Also leave '-' intact in order not to break streaming to stdout.
+ return 'file:' + fn if fn != '-' else fn
+
+
+class FFmpegExtractAudioPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ if preferredcodec is None:
+ preferredcodec = 'best'
+ self._preferredcodec = preferredcodec
+ self._preferredquality = preferredquality
+ self._nopostoverwrites = nopostoverwrites
+
+ def run_ffmpeg(self, path, out_path, codec, more_opts):
+ if codec is None:
+ acodec_opts = []
+ else:
+ acodec_opts = ['-acodec', codec]
+ opts = ['-vn'] + acodec_opts + more_opts
+ try:
+ FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
+ except FFmpegPostProcessorError as err:
+ raise AudioConversionError(err.msg)
+
+ def run(self, information):
+ path = information['filepath']
+
+ filecodec = self.get_audio_codec(path)
+ if filecodec is None:
+ raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
+
+ more_opts = []
+ if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
+ if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
+ # Lossless, but in another container
+ acodec = 'copy'
+ extension = 'm4a'
+ more_opts = ['-bsf:a', 'aac_adtstoasc']
+ elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
+ # Lossless if possible
+ acodec = 'copy'
+ extension = filecodec
+ if filecodec == 'aac':
+ more_opts = ['-f', 'adts']
+ if filecodec == 'vorbis':
+ extension = 'ogg'
+ else:
+ # MP3 otherwise.
+ acodec = 'libmp3lame'
+ extension = 'mp3'
+ more_opts = []
+ if self._preferredquality is not None:
+ if int(self._preferredquality) < 10:
+ more_opts += ['-q:a', self._preferredquality]
+ else:
+ more_opts += ['-b:a', self._preferredquality + 'k']
+ else:
+ # We convert the audio (lossy if codec is lossy)
+ acodec = ACODECS[self._preferredcodec]
+ extension = self._preferredcodec
+ more_opts = []
+ if self._preferredquality is not None:
+ # The opus codec doesn't support the -aq option
+ if int(self._preferredquality) < 10 and extension != 'opus':
+ more_opts += ['-q:a', self._preferredquality]
+ else:
+ more_opts += ['-b:a', self._preferredquality + 'k']
+ if self._preferredcodec == 'aac':
+ more_opts += ['-f', 'adts']
+ if self._preferredcodec == 'm4a':
+ more_opts += ['-bsf:a', 'aac_adtstoasc']
+ if self._preferredcodec == 'vorbis':
+ extension = 'ogg'
+ if self._preferredcodec == 'wav':
+ extension = 'wav'
+ more_opts += ['-f', 'wav']
+
+ prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
+ new_path = prefix + sep + extension
+
+ information['filepath'] = new_path
+ information['ext'] = extension
+
+ # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
+ if (new_path == path
+ or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+ self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
+ return [], information
+
+ try:
+ self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
+ self.run_ffmpeg(path, new_path, acodec, more_opts)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
+
+ # Try to update the date time for extracted audio file.
+ if information.get('filetime') is not None:
+ self.try_utime(
+ new_path, time.time(), information['filetime'],
+ errnote='Cannot update utime of audio file')
+
+ return [path], information
+
+
+class FFmpegVideoConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, preferedformat=None):
+ super(FFmpegVideoConvertorPP, self).__init__(downloader)
+ self._preferedformat = preferedformat
+
+ def run(self, information):
+ path = information['filepath']
+ if information['ext'] == self._preferedformat:
+ self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
+ return [], information
+ options = []
+ if self._preferedformat == 'avi':
+ options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
+ prefix, sep, ext = path.rpartition('.')
+ outpath = prefix + sep + self._preferedformat
+ self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
+ self.run_ffmpeg(path, outpath, options)
+ information['filepath'] = outpath
+ information['format'] = self._preferedformat
+ information['ext'] = self._preferedformat
+ return [path], information
+
+
+class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+ def run(self, information):
+ if information['ext'] not in ('mp4', 'webm', 'mkv'):
+ self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
+ return [], information
+ subtitles = information.get('requested_subtitles')
+ if not subtitles:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
+ return [], information
+
+ filename = information['filepath']
+
+ ext = information['ext']
+ sub_langs = []
+ sub_filenames = []
+ webm_vtt_warn = False
+
+ for lang, sub_info in subtitles.items():
+ sub_ext = sub_info['ext']
+ if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+ sub_langs.append(lang)
+ sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
+ else:
+ if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+ webm_vtt_warn = True
+ self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+
+ if not sub_langs:
+ return [], information
+
+ input_files = [filename] + sub_filenames
+
+ opts = [
+ '-map', '0',
+ '-c', 'copy',
+ # Don't copy the existing subtitles, we may be running the
+ # postprocessor a second time
+ '-map', '-0:s',
+ # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
+ # https://trac.ffmpeg.org/ticket/6016)
+ '-map', '-0:d',
+ ]
+ if information['ext'] == 'mp4':
+ opts += ['-c:s', 'mov_text']
+ for (i, lang) in enumerate(sub_langs):
+ opts.extend(['-map', '%d:0' % (i + 1)])
+ lang_code = ISO639Utils.short2long(lang) or lang
+ opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+
+ temp_filename = prepend_extension(filename, 'temp')
+ self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+ self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return sub_filenames, information
+
+
+class FFmpegMetadataPP(FFmpegPostProcessor):
+ def run(self, info):
+ metadata = {}
+
+ def add(meta_list, info_list=None):
+ if not info_list:
+ info_list = meta_list
+ if not isinstance(meta_list, (list, tuple)):
+ meta_list = (meta_list,)
+ if not isinstance(info_list, (list, tuple)):
+ info_list = (info_list,)
+ for info_f in info_list:
+ if info.get(info_f) is not None:
+ for meta_f in meta_list:
+ metadata[meta_f] = info[info_f]
+ break
+
+ # See [1-4] for some info on media metadata/metadata supported
+ # by ffmpeg.
+ # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
+ # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
+ # 3. https://kodi.wiki/view/Video_file_tagging
+ # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html
+
+ add('title', ('track', 'title'))
+ add('date', 'upload_date')
+ add(('description', 'comment'), 'description')
+ add('purl', 'webpage_url')
+ add('track', 'track_number')
+ add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
+ add('genre')
+ add('album')
+ add('album_artist')
+ add('disc', 'disc_number')
+ add('show', 'series')
+ add('season_number')
+ add('episode_id', ('episode', 'episode_id'))
+ add('episode_sort', 'episode_number')
+
+ if not metadata:
+ self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ in_filenames = [filename]
+ options = []
+
+ if info['ext'] == 'm4a':
+ options.extend(['-vn', '-acodec', 'copy'])
+ else:
+ options.extend(['-c', 'copy'])
+
+ for (name, value) in metadata.items():
+ options.extend(['-metadata', '%s=%s' % (name, value)])
+
+ chapters = info.get('chapters', [])
+ if chapters:
+ metadata_filename = replace_extension(filename, 'meta')
+ with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+ def ffmpeg_escape(text):
+ return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
+
+ metadata_file_content = ';FFMETADATA1\n'
+ for chapter in chapters:
+ metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
+ metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
+ metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
+ chapter_title = chapter.get('title')
+ if chapter_title:
+ metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
+ f.write(metadata_file_content)
+ in_filenames.append(metadata_filename)
+ options.extend(['-map_metadata', '1'])
+
+ self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
+ self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
+ if chapters:
+ os.remove(metadata_filename)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return [], info
+
+
+class FFmpegMergerPP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
+ self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
+ self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return info['__files_to_merge'], info
+
+ def can_merge(self):
+ # TODO: figure out merge-capable ffmpeg version
+ if self.basename != 'avconv':
+ return True
+
+ required_version = '10-0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+ 'hypervideo will download single file media. '
+ 'Update %s to version %s or newer to fix this.') % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+ return False
+ return True
+
+
+class FFmpegFixupStretchedPP(FFmpegPostProcessor):
+ def run(self, info):
+ stretched_ratio = info.get('stretched_ratio')
+ if stretched_ratio is None or stretched_ratio == 1:
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
+ self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
+class FFmpegFixupM4aPP(FFmpegPostProcessor):
+ def run(self, info):
+ if info.get('container') != 'm4a_dash':
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4']
+ self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ if self.get_audio_codec(filename) == 'aac':
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+ self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return [], info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, format=None):
+ super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+ self.format = format
+
+ def run(self, info):
+ subs = info.get('requested_subtitles')
+ filename = info['filepath']
+ new_ext = self.format
+ new_format = new_ext
+ if new_format == 'vtt':
+ new_format = 'webvtt'
+ if subs is None:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+ return [], info
+ self._downloader.to_screen('[ffmpeg] Converting subtitles')
+ sub_filenames = []
+ for lang, sub in subs.items():
+ ext = sub['ext']
+ if ext == new_ext:
+ self._downloader.to_screen(
+ '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
+ continue
+ old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
+ sub_filenames.append(old_file)
+ new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
+
+ if ext in ('dfxp', 'ttml', 'tt'):
+ self._downloader.report_warning(
+ 'You have requested to convert dfxp (TTML) subtitles into another format, '
+ 'which results in style information loss')
+
+ dfxp_file = old_file
+ srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
+
+ with open(dfxp_file, 'rb') as f:
+ srt_data = dfxp2srt(f.read())
+
+ with io.open(srt_file, 'wt', encoding='utf-8') as f:
+ f.write(srt_data)
+ old_file = srt_file
+
+ subs[lang] = {
+ 'ext': 'srt',
+ 'data': srt_data
+ }
+
+ if new_ext == 'srt':
+ continue
+ else:
+ sub_filenames.append(srt_file)
+
+ self.run_ffmpeg(old_file, new_file, ['-f', new_format])
+
+ with io.open(new_file, 'rt', encoding='utf-8') as f:
+ subs[lang] = {
+ 'ext': new_ext,
+ 'data': f.read(),
+ }
+
+ return sub_filenames, info
diff --git a/hypervideo_dl/postprocessor/metadatafromtitle.py b/hypervideo_dl/postprocessor/metadatafromtitle.py
new file mode 100644
index 0000000..f5c14d9
--- /dev/null
+++ b/hypervideo_dl/postprocessor/metadatafromtitle.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+
+
+class MetadataFromTitlePP(PostProcessor):
+ def __init__(self, downloader, titleformat):
+ super(MetadataFromTitlePP, self).__init__(downloader)
+ self._titleformat = titleformat
+ self._titleregex = (self.format_to_regex(titleformat)
+ if re.search(r'%\(\w+\)s', titleformat)
+ else titleformat)
+
+ def format_to_regex(self, fmt):
+ r"""
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ lastpos = 0
+ regex = ''
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += r'(?P<' + match.group(1) + '>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:])
+ return regex
+
+ def run(self, info):
+ title = info['title']
+ match = re.match(self._titleregex, title)
+ if match is None:
+ self._downloader.to_screen(
+ '[fromtitle] Could not interpret title of video as "%s"'
+ % self._titleformat)
+ return [], info
+ for attribute, value in match.groupdict().items():
+ info[attribute] = value
+ self._downloader.to_screen(
+ '[fromtitle] parsed %s: %s'
+ % (attribute, value if value is not None else 'NA'))
+
+ return [], info
diff --git a/hypervideo_dl/postprocessor/xattrpp.py b/hypervideo_dl/postprocessor/xattrpp.py
new file mode 100644
index 0000000..814dabe
--- /dev/null
+++ b/hypervideo_dl/postprocessor/xattrpp.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+from .common import PostProcessor
+from ..compat import compat_os_name
+from ..utils import (
+ hyphenate_date,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+)
+
+
+class XAttrMetadataPP(PostProcessor):
+
+ #
+ # More info about extended attributes for media:
+ # http://freedesktop.org/wiki/CommonExtendedAttributes/
+ # http://www.freedesktop.org/wiki/PhreedomDraft/
+ # http://dublincore.org/documents/usageguide/elements.shtml
+ #
+ # TODO:
+ # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
+ # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
+ #
+
+ def run(self, info):
+ """ Set extended attributes on downloaded file (if xattr support is found). """
+
+ # Write the metadata to the file's xattrs
+ self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
+
+ filename = info['filepath']
+
+ try:
+ xattr_mapping = {
+ 'user.xdg.referrer.url': 'webpage_url',
+ # 'user.xdg.comment': 'description',
+ 'user.dublincore.title': 'title',
+ 'user.dublincore.date': 'upload_date',
+ 'user.dublincore.description': 'description',
+ 'user.dublincore.contributor': 'uploader',
+ 'user.dublincore.format': 'format',
+ }
+
+ num_written = 0
+ for xattrname, infoname in xattr_mapping.items():
+
+ value = info.get(infoname)
+
+ if value:
+ if infoname == 'upload_date':
+ value = hyphenate_date(value)
+
+ byte_value = value.encode('utf-8')
+ write_xattr(filename, xattrname, byte_value)
+ num_written += 1
+
+ return [], info
+
+ except XAttrUnavailableError as e:
+ self._downloader.report_error(str(e))
+ return [], info
+
+ except XAttrMetadataError as e:
+ if e.reason == 'NO_SPACE':
+ self._downloader.report_warning(
+ 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
+ + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
+ elif e.reason == 'VALUE_TOO_LONG':
+ self._downloader.report_warning(
+ 'Unable to write extended attributes due to too long values.')
+ else:
+ msg = 'This filesystem doesn\'t support extended attributes. '
+ if compat_os_name == 'nt':
+ msg += 'You need to use NTFS.'
+ else:
+ msg += '(You may have to enable them in your /etc/fstab)'
+ self._downloader.report_error(msg)
+ return [], info
diff --git a/hypervideo_dl/socks.py b/hypervideo_dl/socks.py
new file mode 100644
index 0000000..5d4adbe
--- /dev/null
+++ b/hypervideo_dl/socks.py
@@ -0,0 +1,273 @@
+# Public Domain SOCKS proxy protocol implementation
+# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
+
+from __future__ import unicode_literals
+
+# References:
+# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
+# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
+# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
+# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
+
+import collections
+import socket
+
+from .compat import (
+ compat_ord,
+ compat_struct_pack,
+ compat_struct_unpack,
+)
+
+__author__ = 'Timo Schmid <coding@timoschmid.de>'
+
+SOCKS4_VERSION = 4
+SOCKS4_REPLY_VERSION = 0x00
+# Excerpt from SOCKS4A protocol:
+# if the client cannot resolve the destination host's domain name to find its
+# IP address, it should set the first three bytes of DSTIP to NULL and the last
+# byte to a non-zero value.
+SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF)
+
+SOCKS5_VERSION = 5
+SOCKS5_USER_AUTH_VERSION = 0x01
+SOCKS5_USER_AUTH_SUCCESS = 0x00
+
+
+class Socks4Command(object):
+ CMD_CONNECT = 0x01
+ CMD_BIND = 0x02
+
+
+class Socks5Command(Socks4Command):
+ CMD_UDP_ASSOCIATE = 0x03
+
+
+class Socks5Auth(object):
+ AUTH_NONE = 0x00
+ AUTH_GSSAPI = 0x01
+ AUTH_USER_PASS = 0x02
+ AUTH_NO_ACCEPTABLE = 0xFF # For server response
+
+
+class Socks5AddressType(object):
+ ATYP_IPV4 = 0x01
+ ATYP_DOMAINNAME = 0x03
+ ATYP_IPV6 = 0x04
+
+
+class ProxyError(socket.error):
+ ERR_SUCCESS = 0x00
+
+ def __init__(self, code=None, msg=None):
+ if code is not None and msg is None:
+ msg = self.CODES.get(code) or 'unknown error'
+ super(ProxyError, self).__init__(code, msg)
+
+
+class InvalidVersionError(ProxyError):
+ def __init__(self, expected_version, got_version):
+ msg = ('Invalid response version from server. Expected {0:02x} got '
+ '{1:02x}'.format(expected_version, got_version))
+ super(InvalidVersionError, self).__init__(0, msg)
+
+
+class Socks4Error(ProxyError):
+ ERR_SUCCESS = 90
+
+ CODES = {
+ 91: 'request rejected or failed',
+ 92: 'request rejected because SOCKS server cannot connect to identd on the client',
+ 93: 'request rejected because the client program and identd report different user-ids'
+ }
+
+
+class Socks5Error(ProxyError):
+ ERR_GENERAL_FAILURE = 0x01
+
+ CODES = {
+ 0x01: 'general SOCKS server failure',
+ 0x02: 'connection not allowed by ruleset',
+ 0x03: 'Network unreachable',
+ 0x04: 'Host unreachable',
+ 0x05: 'Connection refused',
+ 0x06: 'TTL expired',
+ 0x07: 'Command not supported',
+ 0x08: 'Address type not supported',
+ 0xFE: 'unknown username or invalid password',
+ 0xFF: 'all offered authentication methods were rejected'
+ }
+
+
+class ProxyType(object):
+ SOCKS4 = 0
+ SOCKS4A = 1
+ SOCKS5 = 2
+
+
+Proxy = collections.namedtuple('Proxy', (
+ 'type', 'host', 'port', 'username', 'password', 'remote_dns'))
+
+
+class sockssocket(socket.socket):
+ def __init__(self, *args, **kwargs):
+ self._proxy = None
+ super(sockssocket, self).__init__(*args, **kwargs)
+
+ def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
+ assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
+
+ self._proxy = Proxy(proxytype, addr, port, username, password, rdns)
+
+ def recvall(self, cnt):
+ data = b''
+ while len(data) < cnt:
+ cur = self.recv(cnt - len(data))
+ if not cur:
+ raise EOFError('{0} bytes missing'.format(cnt - len(data)))
+ data += cur
+ return data
+
+ def _recv_bytes(self, cnt):
+ data = self.recvall(cnt)
+ return compat_struct_unpack('!{0}B'.format(cnt), data)
+
+ @staticmethod
+ def _len_and_data(data):
+ return compat_struct_pack('!B', len(data)) + data
+
+ def _check_response_version(self, expected_version, got_version):
+ if got_version != expected_version:
+ self.close()
+ raise InvalidVersionError(expected_version, got_version)
+
+ def _resolve_address(self, destaddr, default, use_remote_dns):
+ try:
+ return socket.inet_aton(destaddr)
+ except socket.error:
+ if use_remote_dns and self._proxy.remote_dns:
+ return default
+ else:
+ return socket.inet_aton(socket.gethostbyname(destaddr))
+
+ def _setup_socks4(self, address, is_4a=False):
+ destaddr, port = address
+
+ ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
+
+ packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
+
+ username = (self._proxy.username or '').encode('utf-8')
+ packet += username + b'\x00'
+
+ if is_4a and self._proxy.remote_dns:
+ packet += destaddr.encode('utf-8') + b'\x00'
+
+ self.sendall(packet)
+
+ version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8))
+
+ self._check_response_version(SOCKS4_REPLY_VERSION, version)
+
+ if resp_code != Socks4Error.ERR_SUCCESS:
+ self.close()
+ raise Socks4Error(resp_code)
+
+ return (dsthost, dstport)
+
+ def _setup_socks4a(self, address):
+ self._setup_socks4(address, is_4a=True)
+
+ def _socks5_auth(self):
+ packet = compat_struct_pack('!B', SOCKS5_VERSION)
+
+ auth_methods = [Socks5Auth.AUTH_NONE]
+ if self._proxy.username and self._proxy.password:
+ auth_methods.append(Socks5Auth.AUTH_USER_PASS)
+
+ packet += compat_struct_pack('!B', len(auth_methods))
+ packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods)
+
+ self.sendall(packet)
+
+ version, method = self._recv_bytes(2)
+
+ self._check_response_version(SOCKS5_VERSION, version)
+
+ if method == Socks5Auth.AUTH_NO_ACCEPTABLE or (
+ method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)):
+ self.close()
+ raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
+
+ if method == Socks5Auth.AUTH_USER_PASS:
+ username = self._proxy.username.encode('utf-8')
+ password = self._proxy.password.encode('utf-8')
+ packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION)
+ packet += self._len_and_data(username) + self._len_and_data(password)
+ self.sendall(packet)
+
+ version, status = self._recv_bytes(2)
+
+ self._check_response_version(SOCKS5_USER_AUTH_VERSION, version)
+
+ if status != SOCKS5_USER_AUTH_SUCCESS:
+ self.close()
+ raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE)
+
+ def _setup_socks5(self, address):
+ destaddr, port = address
+
+ ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+
+ self._socks5_auth()
+
+ reserved = 0
+ packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
+ if ipaddr is None:
+ destaddr = destaddr.encode('utf-8')
+ packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
+ packet += self._len_and_data(destaddr)
+ else:
+ packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+ packet += compat_struct_pack('!H', port)
+
+ self.sendall(packet)
+
+ version, status, reserved, atype = self._recv_bytes(4)
+
+ self._check_response_version(SOCKS5_VERSION, version)
+
+ if status != Socks5Error.ERR_SUCCESS:
+ self.close()
+ raise Socks5Error(status)
+
+ if atype == Socks5AddressType.ATYP_IPV4:
+ destaddr = self.recvall(4)
+ elif atype == Socks5AddressType.ATYP_DOMAINNAME:
+ alen = compat_ord(self.recv(1))
+ destaddr = self.recvall(alen)
+ elif atype == Socks5AddressType.ATYP_IPV6:
+ destaddr = self.recvall(16)
+ destport = compat_struct_unpack('!H', self.recvall(2))[0]
+
+ return (destaddr, destport)
+
+ def _make_proxy(self, connect_func, address):
+ if not self._proxy:
+ return connect_func(self, address)
+
+ result = connect_func(self, (self._proxy.host, self._proxy.port))
+ if result != 0 and result is not None:
+ return result
+ setup_funcs = {
+ ProxyType.SOCKS4: self._setup_socks4,
+ ProxyType.SOCKS4A: self._setup_socks4a,
+ ProxyType.SOCKS5: self._setup_socks5,
+ }
+ setup_funcs[self._proxy.type](address)
+ return result
+
+ def connect(self, address):
+ self._make_proxy(socket.socket.connect, address)
+
+ def connect_ex(self, address):
+ return self._make_proxy(socket.socket.connect_ex, address)
diff --git a/hypervideo_dl/utils.py b/hypervideo_dl/utils.py
new file mode 100644
index 0000000..fc62f09
--- /dev/null
+++ b/hypervideo_dl/utils.py
@@ -0,0 +1,5774 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import calendar
+import codecs
+import collections
+import contextlib
+import ctypes
+import datetime
+import email.utils
+import email.header
+import errno
+import functools
+import gzip
+import io
+import itertools
+import json
+import locale
+import math
+import operator
+import os
+import platform
+import random
+import re
+import socket
+import ssl
+import subprocess
+import sys
+import tempfile
+import time
+import traceback
+import xml.etree.ElementTree
+import zlib
+
+from .compat import (
+ compat_HTMLParseError,
+ compat_HTMLParser,
+ compat_HTTPError,
+ compat_basestring,
+ compat_chr,
+ compat_cookiejar,
+ compat_ctypes_WINFUNCTYPE,
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_html_entities,
+ compat_html_entities_html5,
+ compat_http_client,
+ compat_integer_types,
+ compat_kwargs,
+ compat_os_name,
+ compat_parse_qs,
+ compat_shlex_quote,
+ compat_str,
+ compat_struct_pack,
+ compat_struct_unpack,
+ compat_urllib_error,
+ compat_urllib_parse,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_xpath,
+)
+
+from .socks import (
+ ProxyType,
+ sockssocket,
+)
+
+
+def register_socks_protocols():
+ # "Register" SOCKS protocols
+ # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+ # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+ for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if scheme not in compat_urlparse.uses_netloc:
+ compat_urlparse.uses_netloc.append(scheme)
+
+
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
+
+def random_user_agent():
+ _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+ _CHROME_VERSIONS = (
+ '74.0.3729.129',
+ '76.0.3780.3',
+ '76.0.3780.2',
+ '74.0.3729.128',
+ '76.0.3780.1',
+ '76.0.3780.0',
+ '75.0.3770.15',
+ '74.0.3729.127',
+ '74.0.3729.126',
+ '76.0.3779.1',
+ '76.0.3779.0',
+ '75.0.3770.14',
+ '74.0.3729.125',
+ '76.0.3778.1',
+ '76.0.3778.0',
+ '75.0.3770.13',
+ '74.0.3729.124',
+ '74.0.3729.123',
+ '73.0.3683.121',
+ '76.0.3777.1',
+ '76.0.3777.0',
+ '75.0.3770.12',
+ '74.0.3729.122',
+ '76.0.3776.4',
+ '75.0.3770.11',
+ '74.0.3729.121',
+ '76.0.3776.3',
+ '76.0.3776.2',
+ '73.0.3683.120',
+ '74.0.3729.120',
+ '74.0.3729.119',
+ '74.0.3729.118',
+ '76.0.3776.1',
+ '76.0.3776.0',
+ '76.0.3775.5',
+ '75.0.3770.10',
+ '74.0.3729.117',
+ '76.0.3775.4',
+ '76.0.3775.3',
+ '74.0.3729.116',
+ '75.0.3770.9',
+ '76.0.3775.2',
+ '76.0.3775.1',
+ '76.0.3775.0',
+ '75.0.3770.8',
+ '74.0.3729.115',
+ '74.0.3729.114',
+ '76.0.3774.1',
+ '76.0.3774.0',
+ '75.0.3770.7',
+ '74.0.3729.113',
+ '74.0.3729.112',
+ '74.0.3729.111',
+ '76.0.3773.1',
+ '76.0.3773.0',
+ '75.0.3770.6',
+ '74.0.3729.110',
+ '74.0.3729.109',
+ '76.0.3772.1',
+ '76.0.3772.0',
+ '75.0.3770.5',
+ '74.0.3729.108',
+ '74.0.3729.107',
+ '76.0.3771.1',
+ '76.0.3771.0',
+ '75.0.3770.4',
+ '74.0.3729.106',
+ '74.0.3729.105',
+ '75.0.3770.3',
+ '74.0.3729.104',
+ '74.0.3729.103',
+ '74.0.3729.102',
+ '75.0.3770.2',
+ '74.0.3729.101',
+ '75.0.3770.1',
+ '75.0.3770.0',
+ '74.0.3729.100',
+ '75.0.3769.5',
+ '75.0.3769.4',
+ '74.0.3729.99',
+ '75.0.3769.3',
+ '75.0.3769.2',
+ '75.0.3768.6',
+ '74.0.3729.98',
+ '75.0.3769.1',
+ '75.0.3769.0',
+ '74.0.3729.97',
+ '73.0.3683.119',
+ '73.0.3683.118',
+ '74.0.3729.96',
+ '75.0.3768.5',
+ '75.0.3768.4',
+ '75.0.3768.3',
+ '75.0.3768.2',
+ '74.0.3729.95',
+ '74.0.3729.94',
+ '75.0.3768.1',
+ '75.0.3768.0',
+ '74.0.3729.93',
+ '74.0.3729.92',
+ '73.0.3683.117',
+ '74.0.3729.91',
+ '75.0.3766.3',
+ '74.0.3729.90',
+ '75.0.3767.2',
+ '75.0.3767.1',
+ '75.0.3767.0',
+ '74.0.3729.89',
+ '73.0.3683.116',
+ '75.0.3766.2',
+ '74.0.3729.88',
+ '75.0.3766.1',
+ '75.0.3766.0',
+ '74.0.3729.87',
+ '73.0.3683.115',
+ '74.0.3729.86',
+ '75.0.3765.1',
+ '75.0.3765.0',
+ '74.0.3729.85',
+ '73.0.3683.114',
+ '74.0.3729.84',
+ '75.0.3764.1',
+ '75.0.3764.0',
+ '74.0.3729.83',
+ '73.0.3683.113',
+ '75.0.3763.2',
+ '75.0.3761.4',
+ '74.0.3729.82',
+ '75.0.3763.1',
+ '75.0.3763.0',
+ '74.0.3729.81',
+ '73.0.3683.112',
+ '75.0.3762.1',
+ '75.0.3762.0',
+ '74.0.3729.80',
+ '75.0.3761.3',
+ '74.0.3729.79',
+ '73.0.3683.111',
+ '75.0.3761.2',
+ '74.0.3729.78',
+ '74.0.3729.77',
+ '75.0.3761.1',
+ '75.0.3761.0',
+ '73.0.3683.110',
+ '74.0.3729.76',
+ '74.0.3729.75',
+ '75.0.3760.0',
+ '74.0.3729.74',
+ '75.0.3759.8',
+ '75.0.3759.7',
+ '75.0.3759.6',
+ '74.0.3729.73',
+ '75.0.3759.5',
+ '74.0.3729.72',
+ '73.0.3683.109',
+ '75.0.3759.4',
+ '75.0.3759.3',
+ '74.0.3729.71',
+ '75.0.3759.2',
+ '74.0.3729.70',
+ '73.0.3683.108',
+ '74.0.3729.69',
+ '75.0.3759.1',
+ '75.0.3759.0',
+ '74.0.3729.68',
+ '73.0.3683.107',
+ '74.0.3729.67',
+ '75.0.3758.1',
+ '75.0.3758.0',
+ '74.0.3729.66',
+ '73.0.3683.106',
+ '74.0.3729.65',
+ '75.0.3757.1',
+ '75.0.3757.0',
+ '74.0.3729.64',
+ '73.0.3683.105',
+ '74.0.3729.63',
+ '75.0.3756.1',
+ '75.0.3756.0',
+ '74.0.3729.62',
+ '73.0.3683.104',
+ '75.0.3755.3',
+ '75.0.3755.2',
+ '73.0.3683.103',
+ '75.0.3755.1',
+ '75.0.3755.0',
+ '74.0.3729.61',
+ '73.0.3683.102',
+ '74.0.3729.60',
+ '75.0.3754.2',
+ '74.0.3729.59',
+ '75.0.3753.4',
+ '74.0.3729.58',
+ '75.0.3754.1',
+ '75.0.3754.0',
+ '74.0.3729.57',
+ '73.0.3683.101',
+ '75.0.3753.3',
+ '75.0.3752.2',
+ '75.0.3753.2',
+ '74.0.3729.56',
+ '75.0.3753.1',
+ '75.0.3753.0',
+ '74.0.3729.55',
+ '73.0.3683.100',
+ '74.0.3729.54',
+ '75.0.3752.1',
+ '75.0.3752.0',
+ '74.0.3729.53',
+ '73.0.3683.99',
+ '74.0.3729.52',
+ '75.0.3751.1',
+ '75.0.3751.0',
+ '74.0.3729.51',
+ '73.0.3683.98',
+ '74.0.3729.50',
+ '75.0.3750.0',
+ '74.0.3729.49',
+ '74.0.3729.48',
+ '74.0.3729.47',
+ '75.0.3749.3',
+ '74.0.3729.46',
+ '73.0.3683.97',
+ '75.0.3749.2',
+ '74.0.3729.45',
+ '75.0.3749.1',
+ '75.0.3749.0',
+ '74.0.3729.44',
+ '73.0.3683.96',
+ '74.0.3729.43',
+ '74.0.3729.42',
+ '75.0.3748.1',
+ '75.0.3748.0',
+ '74.0.3729.41',
+ '75.0.3747.1',
+ '73.0.3683.95',
+ '75.0.3746.4',
+ '74.0.3729.40',
+ '74.0.3729.39',
+ '75.0.3747.0',
+ '75.0.3746.3',
+ '75.0.3746.2',
+ '74.0.3729.38',
+ '75.0.3746.1',
+ '75.0.3746.0',
+ '74.0.3729.37',
+ '73.0.3683.94',
+ '75.0.3745.5',
+ '75.0.3745.4',
+ '75.0.3745.3',
+ '75.0.3745.2',
+ '74.0.3729.36',
+ '75.0.3745.1',
+ '75.0.3745.0',
+ '75.0.3744.2',
+ '74.0.3729.35',
+ '73.0.3683.93',
+ '74.0.3729.34',
+ '75.0.3744.1',
+ '75.0.3744.0',
+ '74.0.3729.33',
+ '73.0.3683.92',
+ '74.0.3729.32',
+ '74.0.3729.31',
+ '73.0.3683.91',
+ '75.0.3741.2',
+ '75.0.3740.5',
+ '74.0.3729.30',
+ '75.0.3741.1',
+ '75.0.3741.0',
+ '74.0.3729.29',
+ '75.0.3740.4',
+ '73.0.3683.90',
+ '74.0.3729.28',
+ '75.0.3740.3',
+ '73.0.3683.89',
+ '75.0.3740.2',
+ '74.0.3729.27',
+ '75.0.3740.1',
+ '75.0.3740.0',
+ '74.0.3729.26',
+ '73.0.3683.88',
+ '73.0.3683.87',
+ '74.0.3729.25',
+ '75.0.3739.1',
+ '75.0.3739.0',
+ '73.0.3683.86',
+ '74.0.3729.24',
+ '73.0.3683.85',
+ '75.0.3738.4',
+ '75.0.3738.3',
+ '75.0.3738.2',
+ '75.0.3738.1',
+ '75.0.3738.0',
+ '74.0.3729.23',
+ '73.0.3683.84',
+ '74.0.3729.22',
+ '74.0.3729.21',
+ '75.0.3737.1',
+ '75.0.3737.0',
+ '74.0.3729.20',
+ '73.0.3683.83',
+ '74.0.3729.19',
+ '75.0.3736.1',
+ '75.0.3736.0',
+ '74.0.3729.18',
+ '73.0.3683.82',
+ '74.0.3729.17',
+ '75.0.3735.1',
+ '75.0.3735.0',
+ '74.0.3729.16',
+ '73.0.3683.81',
+ '75.0.3734.1',
+ '75.0.3734.0',
+ '74.0.3729.15',
+ '73.0.3683.80',
+ '74.0.3729.14',
+ '75.0.3733.1',
+ '75.0.3733.0',
+ '75.0.3732.1',
+ '74.0.3729.13',
+ '74.0.3729.12',
+ '73.0.3683.79',
+ '74.0.3729.11',
+ '75.0.3732.0',
+ '74.0.3729.10',
+ '73.0.3683.78',
+ '74.0.3729.9',
+ '74.0.3729.8',
+ '74.0.3729.7',
+ '75.0.3731.3',
+ '75.0.3731.2',
+ '75.0.3731.0',
+ '74.0.3729.6',
+ '73.0.3683.77',
+ '73.0.3683.76',
+ '75.0.3730.5',
+ '75.0.3730.4',
+ '73.0.3683.75',
+ '74.0.3729.5',
+ '73.0.3683.74',
+ '75.0.3730.3',
+ '75.0.3730.2',
+ '74.0.3729.4',
+ '73.0.3683.73',
+ '73.0.3683.72',
+ '75.0.3730.1',
+ '75.0.3730.0',
+ '74.0.3729.3',
+ '73.0.3683.71',
+ '74.0.3729.2',
+ '73.0.3683.70',
+ '74.0.3729.1',
+ '74.0.3729.0',
+ '74.0.3726.4',
+ '73.0.3683.69',
+ '74.0.3726.3',
+ '74.0.3728.0',
+ '74.0.3726.2',
+ '73.0.3683.68',
+ '74.0.3726.1',
+ '74.0.3726.0',
+ '74.0.3725.4',
+ '73.0.3683.67',
+ '73.0.3683.66',
+ '74.0.3725.3',
+ '74.0.3725.2',
+ '74.0.3725.1',
+ '74.0.3724.8',
+ '74.0.3725.0',
+ '73.0.3683.65',
+ '74.0.3724.7',
+ '74.0.3724.6',
+ '74.0.3724.5',
+ '74.0.3724.4',
+ '74.0.3724.3',
+ '74.0.3724.2',
+ '74.0.3724.1',
+ '74.0.3724.0',
+ '73.0.3683.64',
+ '74.0.3723.1',
+ '74.0.3723.0',
+ '73.0.3683.63',
+ '74.0.3722.1',
+ '74.0.3722.0',
+ '73.0.3683.62',
+ '74.0.3718.9',
+ '74.0.3702.3',
+ '74.0.3721.3',
+ '74.0.3721.2',
+ '74.0.3721.1',
+ '74.0.3721.0',
+ '74.0.3720.6',
+ '73.0.3683.61',
+ '72.0.3626.122',
+ '73.0.3683.60',
+ '74.0.3720.5',
+ '72.0.3626.121',
+ '74.0.3718.8',
+ '74.0.3720.4',
+ '74.0.3720.3',
+ '74.0.3718.7',
+ '74.0.3720.2',
+ '74.0.3720.1',
+ '74.0.3720.0',
+ '74.0.3718.6',
+ '74.0.3719.5',
+ '73.0.3683.59',
+ '74.0.3718.5',
+ '74.0.3718.4',
+ '74.0.3719.4',
+ '74.0.3719.3',
+ '74.0.3719.2',
+ '74.0.3719.1',
+ '73.0.3683.58',
+ '74.0.3719.0',
+ '73.0.3683.57',
+ '73.0.3683.56',
+ '74.0.3718.3',
+ '73.0.3683.55',
+ '74.0.3718.2',
+ '74.0.3718.1',
+ '74.0.3718.0',
+ '73.0.3683.54',
+ '74.0.3717.2',
+ '73.0.3683.53',
+ '74.0.3717.1',
+ '74.0.3717.0',
+ '73.0.3683.52',
+ '74.0.3716.1',
+ '74.0.3716.0',
+ '73.0.3683.51',
+ '74.0.3715.1',
+ '74.0.3715.0',
+ '73.0.3683.50',
+ '74.0.3711.2',
+ '74.0.3714.2',
+ '74.0.3713.3',
+ '74.0.3714.1',
+ '74.0.3714.0',
+ '73.0.3683.49',
+ '74.0.3713.1',
+ '74.0.3713.0',
+ '72.0.3626.120',
+ '73.0.3683.48',
+ '74.0.3712.2',
+ '74.0.3712.1',
+ '74.0.3712.0',
+ '73.0.3683.47',
+ '72.0.3626.119',
+ '73.0.3683.46',
+ '74.0.3710.2',
+ '72.0.3626.118',
+ '74.0.3711.1',
+ '74.0.3711.0',
+ '73.0.3683.45',
+ '72.0.3626.117',
+ '74.0.3710.1',
+ '74.0.3710.0',
+ '73.0.3683.44',
+ '72.0.3626.116',
+ '74.0.3709.1',
+ '74.0.3709.0',
+ '74.0.3704.9',
+ '73.0.3683.43',
+ '72.0.3626.115',
+ '74.0.3704.8',
+ '74.0.3704.7',
+ '74.0.3708.0',
+ '74.0.3706.7',
+ '74.0.3704.6',
+ '73.0.3683.42',
+ '72.0.3626.114',
+ '74.0.3706.6',
+ '72.0.3626.113',
+ '74.0.3704.5',
+ '74.0.3706.5',
+ '74.0.3706.4',
+ '74.0.3706.3',
+ '74.0.3706.2',
+ '74.0.3706.1',
+ '74.0.3706.0',
+ '73.0.3683.41',
+ '72.0.3626.112',
+ '74.0.3705.1',
+ '74.0.3705.0',
+ '73.0.3683.40',
+ '72.0.3626.111',
+ '73.0.3683.39',
+ '74.0.3704.4',
+ '73.0.3683.38',
+ '74.0.3704.3',
+ '74.0.3704.2',
+ '74.0.3704.1',
+ '74.0.3704.0',
+ '73.0.3683.37',
+ '72.0.3626.110',
+ '72.0.3626.109',
+ '74.0.3703.3',
+ '74.0.3703.2',
+ '73.0.3683.36',
+ '74.0.3703.1',
+ '74.0.3703.0',
+ '73.0.3683.35',
+ '72.0.3626.108',
+ '74.0.3702.2',
+ '74.0.3699.3',
+ '74.0.3702.1',
+ '74.0.3702.0',
+ '73.0.3683.34',
+ '72.0.3626.107',
+ '73.0.3683.33',
+ '74.0.3701.1',
+ '74.0.3701.0',
+ '73.0.3683.32',
+ '73.0.3683.31',
+ '72.0.3626.105',
+ '74.0.3700.1',
+ '74.0.3700.0',
+ '73.0.3683.29',
+ '72.0.3626.103',
+ '74.0.3699.2',
+ '74.0.3699.1',
+ '74.0.3699.0',
+ '73.0.3683.28',
+ '72.0.3626.102',
+ '73.0.3683.27',
+ '73.0.3683.26',
+ '74.0.3698.0',
+ '74.0.3696.2',
+ '72.0.3626.101',
+ '73.0.3683.25',
+ '74.0.3696.1',
+ '74.0.3696.0',
+ '74.0.3694.8',
+ '72.0.3626.100',
+ '74.0.3694.7',
+ '74.0.3694.6',
+ '74.0.3694.5',
+ '74.0.3694.4',
+ '72.0.3626.99',
+ '72.0.3626.98',
+ '74.0.3694.3',
+ '73.0.3683.24',
+ '72.0.3626.97',
+ '72.0.3626.96',
+ '72.0.3626.95',
+ '73.0.3683.23',
+ '72.0.3626.94',
+ '73.0.3683.22',
+ '73.0.3683.21',
+ '72.0.3626.93',
+ '74.0.3694.2',
+ '72.0.3626.92',
+ '74.0.3694.1',
+ '74.0.3694.0',
+ '74.0.3693.6',
+ '73.0.3683.20',
+ '72.0.3626.91',
+ '74.0.3693.5',
+ '74.0.3693.4',
+ '74.0.3693.3',
+ '74.0.3693.2',
+ '73.0.3683.19',
+ '74.0.3693.1',
+ '74.0.3693.0',
+ '73.0.3683.18',
+ '72.0.3626.90',
+ '74.0.3692.1',
+ '74.0.3692.0',
+ '73.0.3683.17',
+ '72.0.3626.89',
+ '74.0.3687.3',
+ '74.0.3691.1',
+ '74.0.3691.0',
+ '73.0.3683.16',
+ '72.0.3626.88',
+ '72.0.3626.87',
+ '73.0.3683.15',
+ '74.0.3690.1',
+ '74.0.3690.0',
+ '73.0.3683.14',
+ '72.0.3626.86',
+ '73.0.3683.13',
+ '73.0.3683.12',
+ '74.0.3689.1',
+ '74.0.3689.0',
+ '73.0.3683.11',
+ '72.0.3626.85',
+ '73.0.3683.10',
+ '72.0.3626.84',
+ '73.0.3683.9',
+ '74.0.3688.1',
+ '74.0.3688.0',
+ '73.0.3683.8',
+ '72.0.3626.83',
+ '74.0.3687.2',
+ '74.0.3687.1',
+ '74.0.3687.0',
+ '73.0.3683.7',
+ '72.0.3626.82',
+ '74.0.3686.4',
+ '72.0.3626.81',
+ '74.0.3686.3',
+ '74.0.3686.2',
+ '74.0.3686.1',
+ '74.0.3686.0',
+ '73.0.3683.6',
+ '72.0.3626.80',
+ '74.0.3685.1',
+ '74.0.3685.0',
+ '73.0.3683.5',
+ '72.0.3626.79',
+ '74.0.3684.1',
+ '74.0.3684.0',
+ '73.0.3683.4',
+ '72.0.3626.78',
+ '72.0.3626.77',
+ '73.0.3683.3',
+ '73.0.3683.2',
+ '72.0.3626.76',
+ '73.0.3683.1',
+ '73.0.3683.0',
+ '72.0.3626.75',
+ '71.0.3578.141',
+ '73.0.3682.1',
+ '73.0.3682.0',
+ '72.0.3626.74',
+ '71.0.3578.140',
+ '73.0.3681.4',
+ '73.0.3681.3',
+ '73.0.3681.2',
+ '73.0.3681.1',
+ '73.0.3681.0',
+ '72.0.3626.73',
+ '71.0.3578.139',
+ '72.0.3626.72',
+ '72.0.3626.71',
+ '73.0.3680.1',
+ '73.0.3680.0',
+ '72.0.3626.70',
+ '71.0.3578.138',
+ '73.0.3678.2',
+ '73.0.3679.1',
+ '73.0.3679.0',
+ '72.0.3626.69',
+ '71.0.3578.137',
+ '73.0.3678.1',
+ '73.0.3678.0',
+ '71.0.3578.136',
+ '73.0.3677.1',
+ '73.0.3677.0',
+ '72.0.3626.68',
+ '72.0.3626.67',
+ '71.0.3578.135',
+ '73.0.3676.1',
+ '73.0.3676.0',
+ '73.0.3674.2',
+ '72.0.3626.66',
+ '71.0.3578.134',
+ '73.0.3674.1',
+ '73.0.3674.0',
+ '72.0.3626.65',
+ '71.0.3578.133',
+ '73.0.3673.2',
+ '73.0.3673.1',
+ '73.0.3673.0',
+ '72.0.3626.64',
+ '71.0.3578.132',
+ '72.0.3626.63',
+ '72.0.3626.62',
+ '72.0.3626.61',
+ '72.0.3626.60',
+ '73.0.3672.1',
+ '73.0.3672.0',
+ '72.0.3626.59',
+ '71.0.3578.131',
+ '73.0.3671.3',
+ '73.0.3671.2',
+ '73.0.3671.1',
+ '73.0.3671.0',
+ '72.0.3626.58',
+ '71.0.3578.130',
+ '73.0.3670.1',
+ '73.0.3670.0',
+ '72.0.3626.57',
+ '71.0.3578.129',
+ '73.0.3669.1',
+ '73.0.3669.0',
+ '72.0.3626.56',
+ '71.0.3578.128',
+ '73.0.3668.2',
+ '73.0.3668.1',
+ '73.0.3668.0',
+ '72.0.3626.55',
+ '71.0.3578.127',
+ '73.0.3667.2',
+ '73.0.3667.1',
+ '73.0.3667.0',
+ '72.0.3626.54',
+ '71.0.3578.126',
+ '73.0.3666.1',
+ '73.0.3666.0',
+ '72.0.3626.53',
+ '71.0.3578.125',
+ '73.0.3665.4',
+ '73.0.3665.3',
+ '72.0.3626.52',
+ '73.0.3665.2',
+ '73.0.3664.4',
+ '73.0.3665.1',
+ '73.0.3665.0',
+ '72.0.3626.51',
+ '71.0.3578.124',
+ '72.0.3626.50',
+ '73.0.3664.3',
+ '73.0.3664.2',
+ '73.0.3664.1',
+ '73.0.3664.0',
+ '73.0.3663.2',
+ '72.0.3626.49',
+ '71.0.3578.123',
+ '73.0.3663.1',
+ '73.0.3663.0',
+ '72.0.3626.48',
+ '71.0.3578.122',
+ '73.0.3662.1',
+ '73.0.3662.0',
+ '72.0.3626.47',
+ '71.0.3578.121',
+ '73.0.3661.1',
+ '72.0.3626.46',
+ '73.0.3661.0',
+ '72.0.3626.45',
+ '71.0.3578.120',
+ '73.0.3660.2',
+ '73.0.3660.1',
+ '73.0.3660.0',
+ '72.0.3626.44',
+ '71.0.3578.119',
+ '73.0.3659.1',
+ '73.0.3659.0',
+ '72.0.3626.43',
+ '71.0.3578.118',
+ '73.0.3658.1',
+ '73.0.3658.0',
+ '72.0.3626.42',
+ '71.0.3578.117',
+ '73.0.3657.1',
+ '73.0.3657.0',
+ '72.0.3626.41',
+ '71.0.3578.116',
+ '73.0.3656.1',
+ '73.0.3656.0',
+ '72.0.3626.40',
+ '71.0.3578.115',
+ '73.0.3655.1',
+ '73.0.3655.0',
+ '72.0.3626.39',
+ '71.0.3578.114',
+ '73.0.3654.1',
+ '73.0.3654.0',
+ '72.0.3626.38',
+ '71.0.3578.113',
+ '73.0.3653.1',
+ '73.0.3653.0',
+ '72.0.3626.37',
+ '71.0.3578.112',
+ '73.0.3652.1',
+ '73.0.3652.0',
+ '72.0.3626.36',
+ '71.0.3578.111',
+ '73.0.3651.1',
+ '73.0.3651.0',
+ '72.0.3626.35',
+ '71.0.3578.110',
+ '73.0.3650.1',
+ '73.0.3650.0',
+ '72.0.3626.34',
+ '71.0.3578.109',
+ '73.0.3649.1',
+ '73.0.3649.0',
+ '72.0.3626.33',
+ '71.0.3578.108',
+ '73.0.3648.2',
+ '73.0.3648.1',
+ '73.0.3648.0',
+ '72.0.3626.32',
+ '71.0.3578.107',
+ '73.0.3647.2',
+ '73.0.3647.1',
+ '73.0.3647.0',
+ '72.0.3626.31',
+ '71.0.3578.106',
+ '73.0.3635.3',
+ '73.0.3646.2',
+ '73.0.3646.1',
+ '73.0.3646.0',
+ '72.0.3626.30',
+ '71.0.3578.105',
+ '72.0.3626.29',
+ '73.0.3645.2',
+ '73.0.3645.1',
+ '73.0.3645.0',
+ '72.0.3626.28',
+ '71.0.3578.104',
+ '72.0.3626.27',
+ '72.0.3626.26',
+ '72.0.3626.25',
+ '72.0.3626.24',
+ '73.0.3644.0',
+ '73.0.3643.2',
+ '72.0.3626.23',
+ '71.0.3578.103',
+ '73.0.3643.1',
+ '73.0.3643.0',
+ '72.0.3626.22',
+ '71.0.3578.102',
+ '73.0.3642.1',
+ '73.0.3642.0',
+ '72.0.3626.21',
+ '71.0.3578.101',
+ '73.0.3641.1',
+ '73.0.3641.0',
+ '72.0.3626.20',
+ '71.0.3578.100',
+ '72.0.3626.19',
+ '73.0.3640.1',
+ '73.0.3640.0',
+ '72.0.3626.18',
+ '73.0.3639.1',
+ '71.0.3578.99',
+ '73.0.3639.0',
+ '72.0.3626.17',
+ '73.0.3638.2',
+ '72.0.3626.16',
+ '73.0.3638.1',
+ '73.0.3638.0',
+ '72.0.3626.15',
+ '71.0.3578.98',
+ '73.0.3635.2',
+ '71.0.3578.97',
+ '73.0.3637.1',
+ '73.0.3637.0',
+ '72.0.3626.14',
+ '71.0.3578.96',
+ '71.0.3578.95',
+ '72.0.3626.13',
+ '71.0.3578.94',
+ '73.0.3636.2',
+ '71.0.3578.93',
+ '73.0.3636.1',
+ '73.0.3636.0',
+ '72.0.3626.12',
+ '71.0.3578.92',
+ '73.0.3635.1',
+ '73.0.3635.0',
+ '72.0.3626.11',
+ '71.0.3578.91',
+ '73.0.3634.2',
+ '73.0.3634.1',
+ '73.0.3634.0',
+ '72.0.3626.10',
+ '71.0.3578.90',
+ '71.0.3578.89',
+ '73.0.3633.2',
+ '73.0.3633.1',
+ '73.0.3633.0',
+ '72.0.3610.4',
+ '72.0.3626.9',
+ '71.0.3578.88',
+ '73.0.3632.5',
+ '73.0.3632.4',
+ '73.0.3632.3',
+ '73.0.3632.2',
+ '73.0.3632.1',
+ '73.0.3632.0',
+ '72.0.3626.8',
+ '71.0.3578.87',
+ '73.0.3631.2',
+ '73.0.3631.1',
+ '73.0.3631.0',
+ '72.0.3626.7',
+ '71.0.3578.86',
+ '72.0.3626.6',
+ '73.0.3630.1',
+ '73.0.3630.0',
+ '72.0.3626.5',
+ '71.0.3578.85',
+ '72.0.3626.4',
+ '73.0.3628.3',
+ '73.0.3628.2',
+ '73.0.3629.1',
+ '73.0.3629.0',
+ '72.0.3626.3',
+ '71.0.3578.84',
+ '73.0.3628.1',
+ '73.0.3628.0',
+ '71.0.3578.83',
+ '73.0.3627.1',
+ '73.0.3627.0',
+ '72.0.3626.2',
+ '71.0.3578.82',
+ '71.0.3578.81',
+ '71.0.3578.80',
+ '72.0.3626.1',
+ '72.0.3626.0',
+ '71.0.3578.79',
+ '70.0.3538.124',
+ '71.0.3578.78',
+ '72.0.3623.4',
+ '72.0.3625.2',
+ '72.0.3625.1',
+ '72.0.3625.0',
+ '71.0.3578.77',
+ '70.0.3538.123',
+ '72.0.3624.4',
+ '72.0.3624.3',
+ '72.0.3624.2',
+ '71.0.3578.76',
+ '72.0.3624.1',
+ '72.0.3624.0',
+ '72.0.3623.3',
+ '71.0.3578.75',
+ '70.0.3538.122',
+ '71.0.3578.74',
+ '72.0.3623.2',
+ '72.0.3610.3',
+ '72.0.3623.1',
+ '72.0.3623.0',
+ '72.0.3622.3',
+ '72.0.3622.2',
+ '71.0.3578.73',
+ '70.0.3538.121',
+ '72.0.3622.1',
+ '72.0.3622.0',
+ '71.0.3578.72',
+ '70.0.3538.120',
+ '72.0.3621.1',
+ '72.0.3621.0',
+ '71.0.3578.71',
+ '70.0.3538.119',
+ '72.0.3620.1',
+ '72.0.3620.0',
+ '71.0.3578.70',
+ '70.0.3538.118',
+ '71.0.3578.69',
+ '72.0.3619.1',
+ '72.0.3619.0',
+ '71.0.3578.68',
+ '70.0.3538.117',
+ '71.0.3578.67',
+ '72.0.3618.1',
+ '72.0.3618.0',
+ '71.0.3578.66',
+ '70.0.3538.116',
+ '72.0.3617.1',
+ '72.0.3617.0',
+ '71.0.3578.65',
+ '70.0.3538.115',
+ '72.0.3602.3',
+ '71.0.3578.64',
+ '72.0.3616.1',
+ '72.0.3616.0',
+ '71.0.3578.63',
+ '70.0.3538.114',
+ '71.0.3578.62',
+ '72.0.3615.1',
+ '72.0.3615.0',
+ '71.0.3578.61',
+ '70.0.3538.113',
+ '72.0.3614.1',
+ '72.0.3614.0',
+ '71.0.3578.60',
+ '70.0.3538.112',
+ '72.0.3613.1',
+ '72.0.3613.0',
+ '71.0.3578.59',
+ '70.0.3538.111',
+ '72.0.3612.2',
+ '72.0.3612.1',
+ '72.0.3612.0',
+ '70.0.3538.110',
+ '71.0.3578.58',
+ '70.0.3538.109',
+ '72.0.3611.2',
+ '72.0.3611.1',
+ '72.0.3611.0',
+ '71.0.3578.57',
+ '70.0.3538.108',
+ '72.0.3610.2',
+ '71.0.3578.56',
+ '71.0.3578.55',
+ '72.0.3610.1',
+ '72.0.3610.0',
+ '71.0.3578.54',
+ '70.0.3538.107',
+ '71.0.3578.53',
+ '72.0.3609.3',
+ '71.0.3578.52',
+ '72.0.3609.2',
+ '71.0.3578.51',
+ '72.0.3608.5',
+ '72.0.3609.1',
+ '72.0.3609.0',
+ '71.0.3578.50',
+ '70.0.3538.106',
+ '72.0.3608.4',
+ '72.0.3608.3',
+ '72.0.3608.2',
+ '71.0.3578.49',
+ '72.0.3608.1',
+ '72.0.3608.0',
+ '70.0.3538.105',
+ '71.0.3578.48',
+ '72.0.3607.1',
+ '72.0.3607.0',
+ '71.0.3578.47',
+ '70.0.3538.104',
+ '72.0.3606.2',
+ '72.0.3606.1',
+ '72.0.3606.0',
+ '71.0.3578.46',
+ '70.0.3538.103',
+ '70.0.3538.102',
+ '72.0.3605.3',
+ '72.0.3605.2',
+ '72.0.3605.1',
+ '72.0.3605.0',
+ '71.0.3578.45',
+ '70.0.3538.101',
+ '71.0.3578.44',
+ '71.0.3578.43',
+ '70.0.3538.100',
+ '70.0.3538.99',
+ '71.0.3578.42',
+ '72.0.3604.1',
+ '72.0.3604.0',
+ '71.0.3578.41',
+ '70.0.3538.98',
+ '71.0.3578.40',
+ '72.0.3603.2',
+ '72.0.3603.1',
+ '72.0.3603.0',
+ '71.0.3578.39',
+ '70.0.3538.97',
+ '72.0.3602.2',
+ '71.0.3578.38',
+ '71.0.3578.37',
+ '72.0.3602.1',
+ '72.0.3602.0',
+ '71.0.3578.36',
+ '70.0.3538.96',
+ '72.0.3601.1',
+ '72.0.3601.0',
+ '71.0.3578.35',
+ '70.0.3538.95',
+ '72.0.3600.1',
+ '72.0.3600.0',
+ '71.0.3578.34',
+ '70.0.3538.94',
+ '72.0.3599.3',
+ '72.0.3599.2',
+ '72.0.3599.1',
+ '72.0.3599.0',
+ '71.0.3578.33',
+ '70.0.3538.93',
+ '72.0.3598.1',
+ '72.0.3598.0',
+ '71.0.3578.32',
+ '70.0.3538.87',
+ '72.0.3597.1',
+ '72.0.3597.0',
+ '72.0.3596.2',
+ '71.0.3578.31',
+ '70.0.3538.86',
+ '71.0.3578.30',
+ '71.0.3578.29',
+ '72.0.3596.1',
+ '72.0.3596.0',
+ '71.0.3578.28',
+ '70.0.3538.85',
+ '72.0.3595.2',
+ '72.0.3591.3',
+ '72.0.3595.1',
+ '72.0.3595.0',
+ '71.0.3578.27',
+ '70.0.3538.84',
+ '72.0.3594.1',
+ '72.0.3594.0',
+ '71.0.3578.26',
+ '70.0.3538.83',
+ '72.0.3593.2',
+ '72.0.3593.1',
+ '72.0.3593.0',
+ '71.0.3578.25',
+ '70.0.3538.82',
+ '72.0.3589.3',
+ '72.0.3592.2',
+ '72.0.3592.1',
+ '72.0.3592.0',
+ '71.0.3578.24',
+ '72.0.3589.2',
+ '70.0.3538.81',
+ '70.0.3538.80',
+ '72.0.3591.2',
+ '72.0.3591.1',
+ '72.0.3591.0',
+ '71.0.3578.23',
+ '70.0.3538.79',
+ '71.0.3578.22',
+ '72.0.3590.1',
+ '72.0.3590.0',
+ '71.0.3578.21',
+ '70.0.3538.78',
+ '70.0.3538.77',
+ '72.0.3589.1',
+ '72.0.3589.0',
+ '71.0.3578.20',
+ '70.0.3538.76',
+ '71.0.3578.19',
+ '70.0.3538.75',
+ '72.0.3588.1',
+ '72.0.3588.0',
+ '71.0.3578.18',
+ '70.0.3538.74',
+ '72.0.3586.2',
+ '72.0.3587.0',
+ '71.0.3578.17',
+ '70.0.3538.73',
+ '72.0.3586.1',
+ '72.0.3586.0',
+ '71.0.3578.16',
+ '70.0.3538.72',
+ '72.0.3585.1',
+ '72.0.3585.0',
+ '71.0.3578.15',
+ '70.0.3538.71',
+ '71.0.3578.14',
+ '72.0.3584.1',
+ '72.0.3584.0',
+ '71.0.3578.13',
+ '70.0.3538.70',
+ '72.0.3583.2',
+ '71.0.3578.12',
+ '72.0.3583.1',
+ '72.0.3583.0',
+ '71.0.3578.11',
+ '70.0.3538.69',
+ '71.0.3578.10',
+ '72.0.3582.0',
+ '72.0.3581.4',
+ '71.0.3578.9',
+ '70.0.3538.67',
+ '72.0.3581.3',
+ '72.0.3581.2',
+ '72.0.3581.1',
+ '72.0.3581.0',
+ '71.0.3578.8',
+ '70.0.3538.66',
+ '72.0.3580.1',
+ '72.0.3580.0',
+ '71.0.3578.7',
+ '70.0.3538.65',
+ '71.0.3578.6',
+ '72.0.3579.1',
+ '72.0.3579.0',
+ '71.0.3578.5',
+ '70.0.3538.64',
+ '71.0.3578.4',
+ '71.0.3578.3',
+ '71.0.3578.2',
+ '71.0.3578.1',
+ '71.0.3578.0',
+ '70.0.3538.63',
+ '69.0.3497.128',
+ '70.0.3538.62',
+ '70.0.3538.61',
+ '70.0.3538.60',
+ '70.0.3538.59',
+ '71.0.3577.1',
+ '71.0.3577.0',
+ '70.0.3538.58',
+ '69.0.3497.127',
+ '71.0.3576.2',
+ '71.0.3576.1',
+ '71.0.3576.0',
+ '70.0.3538.57',
+ '70.0.3538.56',
+ '71.0.3575.2',
+ '70.0.3538.55',
+ '69.0.3497.126',
+ '70.0.3538.54',
+ '71.0.3575.1',
+ '71.0.3575.0',
+ '71.0.3574.1',
+ '71.0.3574.0',
+ '70.0.3538.53',
+ '69.0.3497.125',
+ '70.0.3538.52',
+ '71.0.3573.1',
+ '71.0.3573.0',
+ '70.0.3538.51',
+ '69.0.3497.124',
+ '71.0.3572.1',
+ '71.0.3572.0',
+ '70.0.3538.50',
+ '69.0.3497.123',
+ '71.0.3571.2',
+ '70.0.3538.49',
+ '69.0.3497.122',
+ '71.0.3571.1',
+ '71.0.3571.0',
+ '70.0.3538.48',
+ '69.0.3497.121',
+ '71.0.3570.1',
+ '71.0.3570.0',
+ '70.0.3538.47',
+ '69.0.3497.120',
+ '71.0.3568.2',
+ '71.0.3569.1',
+ '71.0.3569.0',
+ '70.0.3538.46',
+ '69.0.3497.119',
+ '70.0.3538.45',
+ '71.0.3568.1',
+ '71.0.3568.0',
+ '70.0.3538.44',
+ '69.0.3497.118',
+ '70.0.3538.43',
+ '70.0.3538.42',
+ '71.0.3567.1',
+ '71.0.3567.0',
+ '70.0.3538.41',
+ '69.0.3497.117',
+ '71.0.3566.1',
+ '71.0.3566.0',
+ '70.0.3538.40',
+ '69.0.3497.116',
+ '71.0.3565.1',
+ '71.0.3565.0',
+ '70.0.3538.39',
+ '69.0.3497.115',
+ '71.0.3564.1',
+ '71.0.3564.0',
+ '70.0.3538.38',
+ '69.0.3497.114',
+ '71.0.3563.0',
+ '71.0.3562.2',
+ '70.0.3538.37',
+ '69.0.3497.113',
+ '70.0.3538.36',
+ '70.0.3538.35',
+ '71.0.3562.1',
+ '71.0.3562.0',
+ '70.0.3538.34',
+ '69.0.3497.112',
+ '70.0.3538.33',
+ '71.0.3561.1',
+ '71.0.3561.0',
+ '70.0.3538.32',
+ '69.0.3497.111',
+ '71.0.3559.6',
+ '71.0.3560.1',
+ '71.0.3560.0',
+ '71.0.3559.5',
+ '71.0.3559.4',
+ '70.0.3538.31',
+ '69.0.3497.110',
+ '71.0.3559.3',
+ '70.0.3538.30',
+ '69.0.3497.109',
+ '71.0.3559.2',
+ '71.0.3559.1',
+ '71.0.3559.0',
+ '70.0.3538.29',
+ '69.0.3497.108',
+ '71.0.3558.2',
+ '71.0.3558.1',
+ '71.0.3558.0',
+ '70.0.3538.28',
+ '69.0.3497.107',
+ '71.0.3557.2',
+ '71.0.3557.1',
+ '71.0.3557.0',
+ '70.0.3538.27',
+ '69.0.3497.106',
+ '71.0.3554.4',
+ '70.0.3538.26',
+ '71.0.3556.1',
+ '71.0.3556.0',
+ '70.0.3538.25',
+ '71.0.3554.3',
+ '69.0.3497.105',
+ '71.0.3554.2',
+ '70.0.3538.24',
+ '69.0.3497.104',
+ '71.0.3555.2',
+ '70.0.3538.23',
+ '71.0.3555.1',
+ '71.0.3555.0',
+ '70.0.3538.22',
+ '69.0.3497.103',
+ '71.0.3554.1',
+ '71.0.3554.0',
+ '70.0.3538.21',
+ '69.0.3497.102',
+ '71.0.3553.3',
+ '70.0.3538.20',
+ '69.0.3497.101',
+ '71.0.3553.2',
+ '69.0.3497.100',
+ '71.0.3553.1',
+ '71.0.3553.0',
+ '70.0.3538.19',
+ '69.0.3497.99',
+ '69.0.3497.98',
+ '69.0.3497.97',
+ '71.0.3552.6',
+ '71.0.3552.5',
+ '71.0.3552.4',
+ '71.0.3552.3',
+ '71.0.3552.2',
+ '71.0.3552.1',
+ '71.0.3552.0',
+ '70.0.3538.18',
+ '69.0.3497.96',
+ '71.0.3551.3',
+ '71.0.3551.2',
+ '71.0.3551.1',
+ '71.0.3551.0',
+ '70.0.3538.17',
+ '69.0.3497.95',
+ '71.0.3550.3',
+ '71.0.3550.2',
+ '71.0.3550.1',
+ '71.0.3550.0',
+ '70.0.3538.16',
+ '69.0.3497.94',
+ '71.0.3549.1',
+ '71.0.3549.0',
+ '70.0.3538.15',
+ '69.0.3497.93',
+ '69.0.3497.92',
+ '71.0.3548.1',
+ '71.0.3548.0',
+ '70.0.3538.14',
+ '69.0.3497.91',
+ '71.0.3547.1',
+ '71.0.3547.0',
+ '70.0.3538.13',
+ '69.0.3497.90',
+ '71.0.3546.2',
+ '69.0.3497.89',
+ '71.0.3546.1',
+ '71.0.3546.0',
+ '70.0.3538.12',
+ '69.0.3497.88',
+ '71.0.3545.4',
+ '71.0.3545.3',
+ '71.0.3545.2',
+ '71.0.3545.1',
+ '71.0.3545.0',
+ '70.0.3538.11',
+ '69.0.3497.87',
+ '71.0.3544.5',
+ '71.0.3544.4',
+ '71.0.3544.3',
+ '71.0.3544.2',
+ '71.0.3544.1',
+ '71.0.3544.0',
+ '69.0.3497.86',
+ '70.0.3538.10',
+ '69.0.3497.85',
+ '70.0.3538.9',
+ '69.0.3497.84',
+ '71.0.3543.4',
+ '70.0.3538.8',
+ '71.0.3543.3',
+ '71.0.3543.2',
+ '71.0.3543.1',
+ '71.0.3543.0',
+ '70.0.3538.7',
+ '69.0.3497.83',
+ '71.0.3542.2',
+ '71.0.3542.1',
+ '71.0.3542.0',
+ '70.0.3538.6',
+ '69.0.3497.82',
+ '69.0.3497.81',
+ '71.0.3541.1',
+ '71.0.3541.0',
+ '70.0.3538.5',
+ '69.0.3497.80',
+ '71.0.3540.1',
+ '71.0.3540.0',
+ '70.0.3538.4',
+ '69.0.3497.79',
+ '70.0.3538.3',
+ '71.0.3539.1',
+ '71.0.3539.0',
+ '69.0.3497.78',
+ '68.0.3440.134',
+ '69.0.3497.77',
+ '70.0.3538.2',
+ '70.0.3538.1',
+ '70.0.3538.0',
+ '69.0.3497.76',
+ '68.0.3440.133',
+ '69.0.3497.75',
+ '70.0.3537.2',
+ '70.0.3537.1',
+ '70.0.3537.0',
+ '69.0.3497.74',
+ '68.0.3440.132',
+ '70.0.3536.0',
+ '70.0.3535.5',
+ '70.0.3535.4',
+ '70.0.3535.3',
+ '69.0.3497.73',
+ '68.0.3440.131',
+ '70.0.3532.8',
+ '70.0.3532.7',
+ '69.0.3497.72',
+ '69.0.3497.71',
+ '70.0.3535.2',
+ '70.0.3535.1',
+ '70.0.3535.0',
+ '69.0.3497.70',
+ '68.0.3440.130',
+ '69.0.3497.69',
+ '68.0.3440.129',
+ '70.0.3534.4',
+ '70.0.3534.3',
+ '70.0.3534.2',
+ '70.0.3534.1',
+ '70.0.3534.0',
+ '69.0.3497.68',
+ '68.0.3440.128',
+ '70.0.3533.2',
+ '70.0.3533.1',
+ '70.0.3533.0',
+ '69.0.3497.67',
+ '68.0.3440.127',
+ '70.0.3532.6',
+ '70.0.3532.5',
+ '70.0.3532.4',
+ '69.0.3497.66',
+ '68.0.3440.126',
+ '70.0.3532.3',
+ '70.0.3532.2',
+ '70.0.3532.1',
+ '69.0.3497.60',
+ '69.0.3497.65',
+ '69.0.3497.64',
+ '70.0.3532.0',
+ '70.0.3531.0',
+ '70.0.3530.4',
+ '70.0.3530.3',
+ '70.0.3530.2',
+ '69.0.3497.58',
+ '68.0.3440.125',
+ '69.0.3497.57',
+ '69.0.3497.56',
+ '69.0.3497.55',
+ '69.0.3497.54',
+ '70.0.3530.1',
+ '70.0.3530.0',
+ '69.0.3497.53',
+ '68.0.3440.124',
+ '69.0.3497.52',
+ '70.0.3529.3',
+ '70.0.3529.2',
+ '70.0.3529.1',
+ '70.0.3529.0',
+ '69.0.3497.51',
+ '70.0.3528.4',
+ '68.0.3440.123',
+ '70.0.3528.3',
+ '70.0.3528.2',
+ '70.0.3528.1',
+ '70.0.3528.0',
+ '69.0.3497.50',
+ '68.0.3440.122',
+ '70.0.3527.1',
+ '70.0.3527.0',
+ '69.0.3497.49',
+ '68.0.3440.121',
+ '70.0.3526.1',
+ '70.0.3526.0',
+ '68.0.3440.120',
+ '69.0.3497.48',
+ '69.0.3497.47',
+ '68.0.3440.119',
+ '68.0.3440.118',
+ '70.0.3525.5',
+ '70.0.3525.4',
+ '70.0.3525.3',
+ '68.0.3440.117',
+ '69.0.3497.46',
+ '70.0.3525.2',
+ '70.0.3525.1',
+ '70.0.3525.0',
+ '69.0.3497.45',
+ '68.0.3440.116',
+ '70.0.3524.4',
+ '70.0.3524.3',
+ '69.0.3497.44',
+ '70.0.3524.2',
+ '70.0.3524.1',
+ '70.0.3524.0',
+ '70.0.3523.2',
+ '69.0.3497.43',
+ '68.0.3440.115',
+ '70.0.3505.9',
+ '69.0.3497.42',
+ '70.0.3505.8',
+ '70.0.3523.1',
+ '70.0.3523.0',
+ '69.0.3497.41',
+ '68.0.3440.114',
+ '70.0.3505.7',
+ '69.0.3497.40',
+ '70.0.3522.1',
+ '70.0.3522.0',
+ '70.0.3521.2',
+ '69.0.3497.39',
+ '68.0.3440.113',
+ '70.0.3505.6',
+ '70.0.3521.1',
+ '70.0.3521.0',
+ '69.0.3497.38',
+ '68.0.3440.112',
+ '70.0.3520.1',
+ '70.0.3520.0',
+ '69.0.3497.37',
+ '68.0.3440.111',
+ '70.0.3519.3',
+ '70.0.3519.2',
+ '70.0.3519.1',
+ '70.0.3519.0',
+ '69.0.3497.36',
+ '68.0.3440.110',
+ '70.0.3518.1',
+ '70.0.3518.0',
+ '69.0.3497.35',
+ '69.0.3497.34',
+ '68.0.3440.109',
+ '70.0.3517.1',
+ '70.0.3517.0',
+ '69.0.3497.33',
+ '68.0.3440.108',
+ '69.0.3497.32',
+ '70.0.3516.3',
+ '70.0.3516.2',
+ '70.0.3516.1',
+ '70.0.3516.0',
+ '69.0.3497.31',
+ '68.0.3440.107',
+ '70.0.3515.4',
+ '68.0.3440.106',
+ '70.0.3515.3',
+ '70.0.3515.2',
+ '70.0.3515.1',
+ '70.0.3515.0',
+ '69.0.3497.30',
+ '68.0.3440.105',
+ '68.0.3440.104',
+ '70.0.3514.2',
+ '70.0.3514.1',
+ '70.0.3514.0',
+ '69.0.3497.29',
+ '68.0.3440.103',
+ '70.0.3513.1',
+ '70.0.3513.0',
+ '69.0.3497.28',
+ )
+ return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+
+
+std_headers = {
+ 'User-Agent': random_user_agent(),
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Encoding': 'gzip, deflate',
+ 'Accept-Language': 'en-us,en;q=0.5',
+}
+
+
+USER_AGENTS = {
+ 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
+NO_DEFAULT = object()
+
+ENGLISH_MONTH_NAMES = [
+ 'January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October', 'November', 'December']
+
+MONTH_NAMES = {
+ 'en': ENGLISH_MONTH_NAMES,
+ 'fr': [
+ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+ 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+}
+
+KNOWN_EXTENSIONS = (
+ 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
+ 'flv', 'f4v', 'f4a', 'f4b',
+ 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
+ 'mkv', 'mka', 'mk3d',
+ 'avi', 'divx',
+ 'mov',
+ 'asf', 'wmv', 'wma',
+ '3gp', '3g2',
+ 'mp3',
+ 'flac',
+ 'ape',
+ 'wav',
+ 'f4f', 'f4m', 'm3u8', 'smil')
+
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
+
+DATE_FORMATS = (
+ '%d %B %Y',
+ '%d %b %Y',
+ '%B %d %Y',
+ '%B %dst %Y',
+ '%B %dnd %Y',
+ '%B %drd %Y',
+ '%B %dth %Y',
+ '%b %d %Y',
+ '%b %dst %Y',
+ '%b %dnd %Y',
+ '%b %drd %Y',
+ '%b %dth %Y',
+ '%b %dst %Y %I:%M',
+ '%b %dnd %Y %I:%M',
+ '%b %drd %Y %I:%M',
+ '%b %dth %Y %I:%M',
+ '%Y %m %d',
+ '%Y-%m-%d',
+ '%Y/%m/%d',
+ '%Y/%m/%d %H:%M',
+ '%Y/%m/%d %H:%M:%S',
+ '%Y-%m-%d %H:%M',
+ '%Y-%m-%d %H:%M:%S',
+ '%Y-%m-%d %H:%M:%S.%f',
+ '%d.%m.%Y %H:%M',
+ '%d.%m.%Y %H.%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ '%Y-%m-%dT%H:%M:%S.%fZ',
+ '%Y-%m-%dT%H:%M:%S.%f0Z',
+ '%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S.%f',
+ '%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
+ '%B %d %Y at %H:%M',
+ '%B %d %Y at %H:%M:%S',
+)
+
+DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_DAY_FIRST.extend([
+ '%d-%m-%Y',
+ '%d.%m.%Y',
+ '%d.%m.%y',
+ '%d/%m/%Y',
+ '%d/%m/%y',
+ '%d/%m/%Y %H:%M:%S',
+])
+
+DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_MONTH_FIRST.extend([
+ '%m-%d-%Y',
+ '%m.%d.%Y',
+ '%m/%d/%Y',
+ '%m/%d/%y',
+ '%m/%d/%Y %H:%M:%S',
+])
+
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+
+
+def preferredencoding():
+ """Get preferred encoding.
+
+ Returns the best encoding scheme for the system, based on
+ locale.getpreferredencoding() and some further tweaks.
+ """
+ try:
+ pref = locale.getpreferredencoding()
+ 'TEST'.encode(pref)
+ except Exception:
+ pref = 'UTF-8'
+
+ return pref
+
+
+def write_json_file(obj, fn):
+ """ Encode obj as JSON and write it to fn, atomically if possible """
+
+ fn = encodeFilename(fn)
+ if sys.version_info < (3, 0) and sys.platform != 'win32':
+ encoding = get_filesystem_encoding()
+ # os.path.basename returns a bytes object, but NamedTemporaryFile
+ # will fail if the filename contains non ascii characters unless we
+ # use a unicode object
+ path_basename = lambda f: os.path.basename(fn).decode(encoding)
+ # the same for os.path.dirname
+ path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+ else:
+ path_basename = os.path.basename
+ path_dirname = os.path.dirname
+
+ args = {
+ 'suffix': '.tmp',
+ 'prefix': path_basename(fn) + '.',
+ 'dir': path_dirname(fn),
+ 'delete': False,
+ }
+
+ # In Python 2.x, json.dump expects a bytestream.
+ # In Python 3.x, it writes to a character stream
+ if sys.version_info < (3, 0):
+ args['mode'] = 'wb'
+ else:
+ args.update({
+ 'mode': 'w',
+ 'encoding': 'utf-8',
+ })
+
+ tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
+
+ try:
+ with tf:
+ json.dump(obj, tf)
+ if sys.platform == 'win32':
+ # Need to remove existing file on Windows, else os.rename raises
+ # WindowsError or FileExistsError.
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
+ try:
+ mask = os.umask(0)
+ os.umask(mask)
+ os.chmod(tf.name, 0o666 & ~mask)
+ except OSError:
+ pass
+ os.rename(tf.name, fn)
+ except Exception:
+ try:
+ os.remove(tf.name)
+ except OSError:
+ pass
+ raise
+
+
+if sys.version_info >= (2, 7):
+ def find_xpath_attr(node, xpath, key, val=None):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z_-]+$', key)
+ expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val=None):
+ for f in node.findall(compat_xpath(xpath)):
+ if key not in f.attrib:
+ continue
+ if val is None or f.attrib.get(key) == val:
+ return f
+ return None
+
+# On python2.6 the xml.etree.ElementTree.Element methods don't support
+# the namespace parameter
+
+
+def xpath_with_ns(path, ns_map):
+ components = [c.split(':') for c in path.split('/')]
+ replaced = []
+ for c in components:
+ if len(c) == 1:
+ replaced.append(c[0])
+ else:
+ ns, tag = c
+ replaced.append('{%s}%s' % (ns_map[ns], tag))
+ return '/'.join(replaced)
+
+
+def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ def _find_xpath(xpath):
+ return node.find(compat_xpath(xpath))
+
+ if isinstance(xpath, (str, compat_str)):
+ n = _find_xpath(xpath)
+ else:
+ for xp in xpath:
+ n = _find_xpath(xp)
+ if n is not None:
+ break
+
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element %s' % name)
+ else:
+ return None
+ return n
+
+
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ n = xpath_element(node, xpath, name, fatal=fatal, default=default)
+ if n is None or n == default:
+ return n
+ if n.text is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element\'s text %s' % name)
+ else:
+ return None
+ return n.text
+
+
+def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
+ n = find_xpath_attr(node, xpath, key)
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = '%s[@%s]' % (xpath, key) if name is None else name
+ raise ExtractorError('Could not find XML attribute %s' % name)
+ else:
+ return None
+ return n.attrib[key]
+
+
+def get_element_by_id(id, html):
+ """Return the content of the tag with the specified ID in the passed HTML document"""
+ return get_element_by_attribute('id', id, html)
+
+
+def get_element_by_class(class_name, html):
+ """Return the content of the first tag with the specified class in the passed HTML document"""
+ retval = get_elements_by_class(class_name, html)
+ return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, escape_value=True):
+ retval = get_elements_by_attribute(attribute, value, html, escape_value)
+ return retval[0] if retval else None
+
+
+def get_elements_by_class(class_name, html):
+ """Return the content of all tags with the specified class in the passed HTML document as a list"""
+ return get_elements_by_attribute(
+ 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ html, escape_value=False)
+
+
+def get_elements_by_attribute(attribute, value, html, escape_value=True):
+ """Return the content of the tag with the specified attribute in the passed HTML document"""
+
+ value = re.escape(value) if escape_value else value
+
+ retlist = []
+ for m in re.finditer(r'''(?xs)
+ <([a-zA-Z0-9:._-]+)
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s+%s=['"]?%s['"]?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*>
+ (?P<content>.*?)
+ </\1>
+ ''' % (re.escape(attribute), value), html):
+ res = m.group('content')
+
+ if res.startswith('"') or res.startswith("'"):
+ res = res[1:-1]
+
+ retlist.append(unescapeHTML(res))
+
+ return retlist
+
+
+class HTMLAttributeParser(compat_HTMLParser):
+ """Trivial HTML parser to gather the attributes for a single element"""
+ def __init__(self):
+ self.attrs = {}
+ compat_HTMLParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ self.attrs = dict(attrs)
+
+
+def extract_attributes(html_element):
+ """Given a string for an HTML element such as
+ <el
+ a="foo" B="bar" c="&98;az" d=boz
+ empty= noval entity="&amp;"
+ sq='"' dq="'"
+ >
+ Decode and return a dictionary of attributes.
+ {
+ 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+ 'empty': '', 'noval': None, 'entity': '&',
+ 'sq': '"', 'dq': '\''
+ }.
+ NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+ but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+ """
+ parser = HTMLAttributeParser()
+ try:
+ parser.feed(html_element)
+ parser.close()
+ # Older Python may throw HTMLParseError in case of malformed HTML
+ except compat_HTMLParseError:
+ pass
+ return parser.attrs
+
+
+def clean_html(html):
+ """Clean an HTML snippet into a readable string"""
+
+ if html is None: # Convenience for sanitizing descriptions etc.
+ return html
+
+ # Newline vs <br />
+ html = html.replace('\n', ' ')
+ html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
+ # Strip html tags
+ html = re.sub('<.*?>', '', html)
+ # Replace html entities
+ html = unescapeHTML(html)
+ return html.strip()
+
+
+def sanitize_open(filename, open_mode):
+ """Try to open the given filename, and slightly tweak it if this fails.
+
+ Attempts to open the given filename. If this fails, it tries to change
+ the filename slightly, step by step, until it's either able to open it
+ or it fails and raises a final exception, like the standard open()
+ function.
+
+ It returns the tuple (stream, definitive_file_name).
+ """
+ try:
+ if filename == '-':
+ if sys.platform == 'win32':
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+ return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, filename)
+ except (IOError, OSError) as err:
+ if err.errno in (errno.EACCES,):
+ raise
+
+ # In case of error, try to remove win32 forbidden chars
+ alt_filename = sanitize_path(filename)
+ if alt_filename == filename:
+ raise
+ else:
+ # An exception here should be caught in the caller
+ stream = open(encodeFilename(alt_filename), open_mode)
+ return (stream, alt_filename)
+
+
+def timeconvert(timestr):
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
+
+def sanitize_filename(s, restricted=False, is_id=False):
+ """Sanitizes a string so it could be used as part of a filename.
+ If restricted is set, use a stricter subset of allowed characters.
+ Set is_id if this is not an arbitrary string, but an ID that should be kept
+ if possible.
+ """
+ def replace_insane(char):
+ if restricted and char in ACCENT_CHARS:
+ return ACCENT_CHARS[char]
+ if char == '?' or ord(char) < 32 or ord(char) == 127:
+ return ''
+ elif char == '"':
+ return '' if restricted else '\''
+ elif char == ':':
+ return '_-' if restricted else ' -'
+ elif char in '\\/|*<>':
+ return '_'
+ if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
+ return '_'
+ if restricted and ord(char) > 127:
+ return '_'
+ return char
+
+ # Handle timestamps
+ s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
+ result = ''.join(map(replace_insane, s))
+ if not is_id:
+ while '__' in result:
+ result = result.replace('__', '_')
+ result = result.strip('_')
+ # Common case of "Foreign band name - English song title"
+ if restricted and result.startswith('-_'):
+ result = result[2:]
+ if result.startswith('-'):
+ result = '_' + result[len('-'):]
+ result = result.lstrip('.')
+ if not result:
+ result = '_'
+ return result
+
+
+def sanitize_path(s):
+ """Sanitizes and normalizes path on Windows"""
+ if sys.platform != 'win32':
+ return s
+ drive_or_unc, _ = os.path.splitdrive(s)
+ if sys.version_info < (2, 7) and not drive_or_unc:
+ drive_or_unc, _ = os.path.splitunc(s)
+ norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+ if drive_or_unc:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
+ for path_part in norm_path]
+ if drive_or_unc:
+ sanitized_path.insert(0, drive_or_unc + os.path.sep)
+ return os.path.join(*sanitized_path)
+
+
+def sanitize_url(url):
+ # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+ # the number of unwanted failures due to missing protocol
+ if url.startswith('//'):
+ return 'http:%s' % url
+ # Fix some common typos seen so far
+ COMMON_TYPOS = (
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
+ (r'^httpss://', r'https://'),
+ # https://bx1.be/lives/direct-tv/
+ (r'^rmtp([es]?)://', r'rtmp\1://'),
+ )
+ for mistake, fixup in COMMON_TYPOS:
+ if re.match(mistake, url):
+ return re.sub(mistake, fixup, url)
+ return url
+
+
+def sanitized_Request(url, *args, **kwargs):
+ return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+
+
+def expand_path(s):
+ """Expand shell variables and ~"""
+ return os.path.expandvars(compat_expanduser(s))
+
+
+def orderedSet(iterable):
+ """ Remove all duplicates from the input iterable """
+ res = []
+ for el in iterable:
+ if el not in res:
+ res.append(el)
+ return res
+
+
+def _htmlentity_transform(entity_with_semicolon):
+ """Transforms an HTML entity to a character."""
+ entity = entity_with_semicolon[:-1]
+
+ # Known non-numeric HTML entity
+ if entity in compat_html_entities.name2codepoint:
+ return compat_chr(compat_html_entities.name2codepoint[entity])
+
+ # TODO: HTML5 allows entities without a semicolon. For example,
+ # '&Eacuteric' should be decoded as 'Éric'.
+ if entity_with_semicolon in compat_html_entities_html5:
+ return compat_html_entities_html5[entity_with_semicolon]
+
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
+ if mobj is not None:
+ numstr = mobj.group(1)
+ if numstr.startswith('x'):
+ base = 16
+ numstr = '0%s' % numstr
+ else:
+ base = 10
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
+ try:
+ return compat_chr(int(numstr, base))
+ except ValueError:
+ pass
+
+ # Unknown entity in name, return its literal representation
+ return '&%s;' % entity
+
+
+def unescapeHTML(s):
+ if s is None:
+ return None
+ assert type(s) == compat_str
+
+ return re.sub(
+ r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+
+
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
+def encodeFilename(s, for_subprocess=False):
+ """
+ @param s The name of the file
+ """
+
+ assert type(s) == compat_str
+
+ # Python 3 has a Unicode API
+ if sys.version_info >= (3, 0):
+ return s
+
+ # Pass '' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ return s
+
+ # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+ if sys.platform.startswith('java'):
+ return s
+
+ return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+ if sys.version_info >= (3, 0):
+ return b
+
+ if not isinstance(b, bytes):
+ return b
+
+ return b.decode(get_subprocess_encoding(), 'ignore')
+
+
+def encodeArgument(s):
+ if not isinstance(s, compat_str):
+ # Legacy code that uses byte strings
+ # Uncomment the following line after fixing all post processors
+ # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+ s = s.decode('ascii')
+ return encodeFilename(s, True)
+
+
+def decodeArgument(b):
+ return decodeFilename(b, True)
+
+
+def decodeOption(optval):
+ if optval is None:
+ return optval
+ if isinstance(optval, bytes):
+ optval = optval.decode(preferredencoding())
+
+ assert isinstance(optval, compat_str)
+ return optval
+
+
+def formatSeconds(secs):
+ if secs > 3600:
+ return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
+ elif secs > 60:
+ return '%d:%02d' % (secs // 60, secs % 60)
+ else:
+ return '%d' % secs
+
+
+def make_HTTPS_handler(params, **kwargs):
+ opts_no_check_certificate = params.get('nocheckcertificate', False)
+ if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
+ context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+ if opts_no_check_certificate:
+ context.check_hostname = False
+ context.verify_mode = ssl.CERT_NONE
+ try:
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+ except TypeError:
+ # Python 2.7.8
+ # (create_default_context present but HTTPSHandler has no context=)
+ pass
+
+ if sys.version_info < (3, 2):
+ return YoutubeDLHTTPSHandler(params, **kwargs)
+ else: # Python < 3.4
+ context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context.verify_mode = (ssl.CERT_NONE
+ if opts_no_check_certificate
+ else ssl.CERT_REQUIRED)
+ context.set_default_verify_paths()
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+
+
+def bug_reports_message():
+ if ytdl_is_updateable():
+ update_cmd = 'type doas pacman -Sy hypervideo to update'
+ else:
+ update_cmd = 'see https://yt-dl.org/update on how to update'
+ msg = '; please report this issue on https://yt-dl.org/bug .'
+ msg += ' Make sure you are using the latest version; %s.' % update_cmd
+ msg += ' Be sure to call hypervideo with the --verbose flag and include its complete output.'
+ return msg
+
+
+class YoutubeDLError(Exception):
+ """Base exception for YoutubeDL errors."""
+ pass
+
+
+class ExtractorError(YoutubeDLError):
+ """Error during info extraction."""
+
+ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in hypervideo.
+ """
+
+ if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ expected = True
+ if video_id is not None:
+ msg = video_id + ': ' + msg
+ if cause:
+ msg += ' (caused by %r)' % cause
+ if not expected:
+ msg += bug_reports_message()
+ super(ExtractorError, self).__init__(msg)
+
+ self.traceback = tb
+ self.exc_info = sys.exc_info() # preserve original exception
+ self.cause = cause
+ self.video_id = video_id
+
+ def format_traceback(self):
+ if self.traceback is None:
+ return None
+ return ''.join(traceback.format_tb(self.traceback))
+
+
+class UnsupportedError(ExtractorError):
+ def __init__(self, url):
+ super(UnsupportedError, self).__init__(
+ 'Unsupported URL: %s' % url, expected=True)
+ self.url = url
+
+
+class RegexNotFoundError(ExtractorError):
+ """Error when a regex didn't match"""
+ pass
+
+
+class GeoRestrictedError(ExtractorError):
+ """Geographic restriction Error exception.
+
+ This exception may be thrown when a video is not available from your
+ geographic location due to geographic restrictions imposed by a website.
+ """
+ def __init__(self, msg, countries=None):
+ super(GeoRestrictedError, self).__init__(msg, expected=True)
+ self.msg = msg
+ self.countries = countries
+
+
+class DownloadError(YoutubeDLError):
+ """Download Error exception.
+
+ This exception may be thrown by FileDownloader objects if they are not
+ configured to continue on errors. They will contain the appropriate
+ error message.
+ """
+
+ def __init__(self, msg, exc_info=None):
+ """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+ super(DownloadError, self).__init__(msg)
+ self.exc_info = exc_info
+
+
+class SameFileError(YoutubeDLError):
+ """Same File exception.
+
+ This exception will be thrown by FileDownloader objects if they detect
+ multiple files would have to be downloaded to the same file on disk.
+ """
+ pass
+
+
+class PostProcessingError(YoutubeDLError):
+ """Post Processing exception.
+
+ This exception may be raised by PostProcessor's .run() method to
+ indicate an error in the postprocessing task.
+ """
+
+ def __init__(self, msg):
+ super(PostProcessingError, self).__init__(msg)
+ self.msg = msg
+
+
+class MaxDownloadsReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class UnavailableVideoError(YoutubeDLError):
+ """Unavailable Format exception.
+
+ This exception will be thrown when a video is requested
+ in a format that is not available for that video.
+ """
+ pass
+
+
+class ContentTooShortError(YoutubeDLError):
+ """Content Too Short exception.
+
+ This exception may be raised by FileDownloader objects when a file they
+ download is too small for what the server announced first, indicating
+ the connection was probably interrupted.
+ """
+
+ def __init__(self, downloaded, expected):
+ super(ContentTooShortError, self).__init__(
+ 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
+ )
+ # Both in bytes
+ self.downloaded = downloaded
+ self.expected = expected
+
+
+class XAttrMetadataError(YoutubeDLError):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+ self.msg = msg
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(YoutubeDLError):
+ pass
+
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+ # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
+ # expected HTTP responses to meet HTTP/1.0 or later (see also
+ # https://github.com/ytdl-org/youtube-dl/issues/6727)
+ if sys.version_info < (3, 0):
+ kwargs['strict'] = True
+ hc = http_class(*args, **compat_kwargs(kwargs))
+ source_address = ydl_handler._params.get('source_address')
+
+ if source_address is not None:
+ # This is to workaround _create_connection() from socket where it will try all
+ # address data from getaddrinfo() including IPv6. This filters the result from
+ # getaddrinfo() based on the source_address value.
+ # This is based on the cpython socket.create_connection() function.
+ # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
+ def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ host, port = address
+ err = None
+ addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+ af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
+ ip_addrs = [addr for addr in addrs if addr[0] == af]
+ if addrs and not ip_addrs:
+ ip_version = 'v4' if af == socket.AF_INET else 'v6'
+ raise socket.error(
+ "No remote IP%s addresses available for connect, can't use '%s' as source address"
+ % (ip_version, source_address[0]))
+ for res in ip_addrs:
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ sock.bind(source_address)
+ sock.connect(sa)
+ err = None # Explicitly break reference cycle
+ return sock
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ if err is not None:
+ raise err
+ else:
+ raise socket.error('getaddrinfo returns an empty list')
+ if hasattr(hc, '_create_connection'):
+ hc._create_connection = _create_connection
+ sa = (source_address, 0)
+ if hasattr(hc, 'source_address'): # Python 2.7+
+ hc.source_address = sa
+ else: # Python 2.6
+ def _hc_connect(self, *args, **kwargs):
+ sock = _create_connection(
+ (self.host, self.port), self.timeout, sa)
+ if is_https:
+ self.sock = ssl.wrap_socket(
+ sock, self.key_file, self.cert_file,
+ ssl_version=ssl.PROTOCOL_TLSv1)
+ else:
+ self.sock = sock
+ hc.connect = functools.partial(_hc_connect, hc)
+
+ return hc
+
+
+def handle_youtubedl_headers(headers):
+ filtered_headers = headers
+
+ if 'Youtubedl-no-compression' in filtered_headers:
+ filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+ del filtered_headers['Youtubedl-no-compression']
+
+ return filtered_headers
+
+
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+ """Handler for HTTP requests and responses.
+
+ This class, when installed with an OpenerDirector, automatically adds
+ the standard headers to every HTTP request and handles gzipped and
+ deflated responses from web servers. If compression is to be avoided in
+ a particular request, the original request in the program code only has
+ to include the HTTP header "Youtubedl-no-compression", which will be
+ removed before making the real request.
+
+ Part of this code was copied from:
+
+ http://techknack.net/python-urllib2-handlers/
+
+ Andrew Rowls, the author of that code, agreed to release it to the
+ public domain.
+ """
+
+ def __init__(self, params, *args, **kwargs):
+ compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ self._params = params
+
+ def http_open(self, req):
+ conn_class = compat_http_client.HTTPConnection
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
+ return self.do_open(functools.partial(
+ _create_http_connection, self, conn_class, False),
+ req)
+
+ @staticmethod
+ def deflate(data):
+ try:
+ return zlib.decompress(data, -zlib.MAX_WBITS)
+ except zlib.error:
+ return zlib.decompress(data)
+
+ def http_request(self, req):
+ # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+ # always respected by websites, some tend to give out URLs with non percent-encoded
+ # non-ASCII characters (see telemb.py, ard.py [#3412])
+ # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+ # To work around aforementioned issue we will replace request's original URL with
+ # percent-encoded one
+ # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+ # the code of this workaround has been moved here from YoutubeDL.urlopen()
+ url = req.get_full_url()
+ url_escaped = escape_url(url)
+
+ # Substitute URL if any change after escaping
+ if url != url_escaped:
+ req = update_Request(req, url=url_escaped)
+
+ for h, v in std_headers.items():
+ # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+ # The dict keys are capitalized because of this bug by urllib
+ if h.capitalize() not in req.headers:
+ req.add_header(h, v)
+
+ req.headers = handle_youtubedl_headers(req.headers)
+
+ if sys.version_info < (2, 7) and '#' in req.get_full_url():
+ # Python 2.6 is brain-dead when it comes to fragments
+ req._Request__original = req._Request__original.partition('#')[0]
+ req._Request__r_type = req._Request__r_type.partition('#')[0]
+
+ return req
+
+ def http_response(self, req, resp):
+ old_resp = resp
+ # gzip
+ if resp.headers.get('Content-encoding', '') == 'gzip':
+ content = resp.read()
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+ try:
+ uncompressed = io.BytesIO(gz.read())
+ except IOError as original_ioerror:
+ # There may be junk add the end of the file
+ # See http://stackoverflow.com/q/4928560/35070 for details
+ for i in range(1, 1024):
+ try:
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+ uncompressed = io.BytesIO(gz.read())
+ except IOError:
+ continue
+ break
+ else:
+ raise original_ioerror
+ resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # deflate
+ if resp.headers.get('Content-encoding', '') == 'deflate':
+ gz = io.BytesIO(self.deflate(resp.read()))
+ resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
+ if 300 <= resp.code < 400:
+ location = resp.headers.get('Location')
+ if location:
+ # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
+ if sys.version_info >= (3, 0):
+ location = location.encode('iso-8859-1').decode('utf-8')
+ else:
+ location = location.decode('utf-8')
+ location_escaped = escape_url(location)
+ if location != location_escaped:
+ del resp.headers['Location']
+ if sys.version_info < (3, 0):
+ location_escaped = location_escaped.encode('utf-8')
+ resp.headers['Location'] = location_escaped
+ return resp
+
+ https_request = http_request
+ https_response = http_response
+
+
+def make_socks_conn_class(base_class, socks_proxy):
+ assert issubclass(base_class, (
+ compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+
+ url_components = compat_urlparse.urlparse(socks_proxy)
+ if url_components.scheme.lower() == 'socks5':
+ socks_type = ProxyType.SOCKS5
+ elif url_components.scheme.lower() in ('socks', 'socks4'):
+ socks_type = ProxyType.SOCKS4
+ elif url_components.scheme.lower() == 'socks4a':
+ socks_type = ProxyType.SOCKS4A
+
+ def unquote_if_non_empty(s):
+ if not s:
+ return s
+ return compat_urllib_parse_unquote_plus(s)
+
+ proxy_args = (
+ socks_type,
+ url_components.hostname, url_components.port or 1080,
+ True, # Remote DNS
+ unquote_if_non_empty(url_components.username),
+ unquote_if_non_empty(url_components.password),
+ )
+
+ class SocksConnection(base_class):
+ def connect(self):
+ self.sock = sockssocket()
+ self.sock.setproxy(*proxy_args)
+ if type(self.timeout) in (int, float):
+ self.sock.settimeout(self.timeout)
+ self.sock.connect((self.host, self.port))
+
+ if isinstance(self, compat_http_client.HTTPSConnection):
+ if hasattr(self, '_context'): # Python > 2.6
+ self.sock = self._context.wrap_socket(
+ self.sock, server_hostname=self.host)
+ else:
+ self.sock = ssl.wrap_socket(self.sock)
+
+ return SocksConnection
+
+
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+ def __init__(self, params, https_conn_class=None, *args, **kwargs):
+ compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ self._params = params
+
+ def https_open(self, req):
+ kwargs = {}
+ conn_class = self._https_conn_class
+
+ if hasattr(self, '_context'): # python > 2.6
+ kwargs['context'] = self._context
+ if hasattr(self, '_check_hostname'): # python 3.x
+ kwargs['check_hostname'] = self._check_hostname
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
+ return self.do_open(functools.partial(
+ _create_http_connection, self, conn_class, True),
+ req, **kwargs)
+
+
+class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ """
+ See [1] for cookie file format.
+
+ 1. https://curl.haxx.se/docs/http-cookies.html
+ """
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by hypervideo. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """
+ Save cookies to a file.
+
+ Most of the code is taken from CPython 3.8 and slightly adapted
+ to support cookie files with UTF-8 in both python 2 and 3.
+ """
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ # Store session cookies with `expires` set to 0 instead of an empty
+ # string
+ for cookie in self:
+ if cookie.expires is None:
+ cookie.expires = 0
+
+ with io.open(filename, 'w', encoding='utf-8') as f:
+ f.write(self._HEADER)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure:
+ secure = 'TRUE'
+ else:
+ secure = 'FALSE'
+ if cookie.domain.startswith('.'):
+ initial_dot = 'TRUE'
+ else:
+ initial_dot = 'FALSE'
+ if cookie.expires is not None:
+ expires = compat_str(cookie.expires)
+ else:
+ expires = ''
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ''
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ '\t'.join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value]) + '\n')
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
+ cf = io.StringIO()
+ with io.open(filename, encoding='utf-8') as f:
+ for line in f:
+ try:
+ cf.write(prepare_line(line))
+ except compat_cookiejar.LoadError as e:
+ write_string(
+ 'WARNING: skipping cookie file entry due to %s: %r\n'
+ % (e, line), sys.stderr)
+ continue
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
+ # Session cookies are denoted by either `expires` field set to
+ # an empty string or 0. MozillaCookieJar only recognizes the former
+ # (see [1]). So we need force the latter to be recognized as session
+ # cookies on our own.
+ # Session cookies may be important for cookies-based authentication,
+ # e.g. usually, when user does not check 'Remember me' check box while
+ # logging in on a site, some important cookies are stored as session
+ # cookies so that not recognizing them will result in failed login.
+ # 1. https://bugs.python.org/issue17164
+ for cookie in self:
+ # Treat `expires=0` cookies as session cookies
+ if cookie.expires == 0:
+ cookie.expires = None
+ cookie.discard = True
+
+
+class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+ def __init__(self, cookiejar=None):
+ compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+
+ def http_response(self, request, response):
+ # Python 2 will choke on next HTTP request in row if there are non-ASCII
+ # characters in Set-Cookie HTTP header of last response (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6769).
+ # In order to at least prevent crashing we will percent encode Set-Cookie
+ # header before HTTPCookieProcessor starts processing it.
+ # if sys.version_info < (3, 0) and response.headers:
+ # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
+ # set_cookie = response.headers.get(set_cookie_header)
+ # if set_cookie:
+ # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
+ # if set_cookie != set_cookie_escaped:
+ # del response.headers[set_cookie_header]
+ # response.headers[set_cookie_header] = set_cookie_escaped
+ return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+
+ https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_response = http_response
+
+
+class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+ """YoutubeDL redirect handler
+
+ The code is based on HTTPRedirectHandler implementation from CPython [1].
+
+ This redirect handler solves two issues:
+ - ensures redirect URL is always unicode under python 2
+ - introduces support for experimental HTTP response status code
+ 308 Permanent Redirect [2] used by some sites [3]
+
+ 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
+ 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
+ 3. https://github.com/ytdl-org/youtube-dl/issues/28768
+ """
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a
+ redirection response is received. If a redirection should
+ take place, return a new Request to allow http_error_30x to
+ perform the redirect. Otherwise, raise HTTPError if no-one
+ else should try to handle this url. Return None if you can't
+ but another Handler might.
+ """
+ m = req.get_method()
+ if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
+ or code in (301, 302, 303) and m == "POST")):
+ raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+ # Strictly (according to RFC 2616), 301 or 302 in response to
+ # a POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib.request, in this case). In practice,
+ # essentially all clients do redirect in this case, so we do
+ # the same.
+
+ # On python 2 urlh.geturl() may sometimes return redirect URL
+ # as byte string instead of unicode. This workaround allows
+ # to force it always return unicode.
+ if sys.version_info[0] < 3:
+ newurl = compat_str(newurl)
+
+ # Be conciliant with URIs containing a space. This is mainly
+ # redundant with the more complete encoding done in http_error_302(),
+ # but it is kept for compatibility with other callers.
+ newurl = newurl.replace(' ', '%20')
+
+ CONTENT_HEADERS = ("content-length", "content-type")
+ # NB: don't use dict comprehension for python 2.6 compatibility
+ newheaders = dict((k, v) for k, v in req.headers.items()
+ if k.lower() not in CONTENT_HEADERS)
+ return compat_urllib_request.Request(
+ newurl, headers=newheaders, origin_req_host=req.origin_req_host,
+ unverifiable=True)
+
+
+def extract_timezone(date_str):
+ m = re.search(
+ r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ date_str)
+ if not m:
+ timezone = datetime.timedelta()
+ else:
+ date_str = date_str[:-len(m.group('tz'))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
+ return timezone, date_str
+
+
+def parse_iso8601(date_str, delimiter='T', timezone=None):
+ """ Return a UNIX timestamp from the given date """
+
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'\.[0-9]+', '', date_str)
+
+ if timezone is None:
+ timezone, date_str = extract_timezone(date_str)
+
+ try:
+ date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+ dt = datetime.datetime.strptime(date_str, date_format) - timezone
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+
+
+def date_formats(day_first=True):
+ return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
+
+
+def unified_strdate(date_str, day_first=True):
+ """Return a string with the date in the format YYYYMMDD"""
+
+ if date_str is None:
+ return None
+ upload_date = None
+ # Replace commas
+ date_str = date_str.replace(',', ' ')
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ _, date_str = extract_timezone(date_str)
+
+ for expression in date_formats(day_first):
+ try:
+ upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is None:
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ try:
+ upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is not None:
+ return compat_str(upload_date)
+
+
+def unified_timestamp(date_str, day_first=True):
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'[,|]', '', date_str)
+
+ pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
+ timezone, date_str = extract_timezone(date_str)
+
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
+ # Remove unrecognized timezones from ISO 8601 alike timestamps
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ if m:
+ date_str = date_str[:-len(m.group('tz'))]
+
+ # Python only supports microseconds, so remove nanoseconds
+ m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+ if m:
+ date_str = m.group(1)
+
+ for expression in date_formats(day_first):
+ try:
+ dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ return calendar.timegm(timetuple) + pm_delta * 3600
+
+
+def determine_ext(url, default_ext='unknown_video'):
+ if url is None or '.' not in url:
+ return default_ext
+ guess = url.partition('?')[0].rpartition('.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
+ elif guess.rstrip('/') in KNOWN_EXTENSIONS:
+ return guess.rstrip('/')
+ else:
+ return default_ext
+
+
+def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
+ return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
+
+
+def date_from_str(date_str):
+ """
+ Return a datetime object from a string in the format YYYYMMDD or
+ (now|today)[+-][0-9](day|week|month|year)(s)?"""
+ today = datetime.date.today()
+ if date_str in ('now', 'today'):
+ return today
+ if date_str == 'yesterday':
+ return today - datetime.timedelta(days=1)
+ match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ if match is not None:
+ sign = match.group('sign')
+ time = int(match.group('time'))
+ if sign == '-':
+ time = -time
+ unit = match.group('unit')
+ # A bad approximation?
+ if unit == 'month':
+ unit = 'day'
+ time *= 30
+ elif unit == 'year':
+ unit = 'day'
+ time *= 365
+ unit += 's'
+ delta = datetime.timedelta(**{unit: time})
+ return today + delta
+ return datetime.datetime.strptime(date_str, '%Y%m%d').date()
+
+
+def hyphenate_date(date_str):
+ """
+ Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
+ match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
+ if match is not None:
+ return '-'.join(match.groups())
+ else:
+ return date_str
+
+
+class DateRange(object):
+ """Represents a time interval between two dates"""
+
+ def __init__(self, start=None, end=None):
+ """start and end must be strings in the format accepted by date"""
+ if start is not None:
+ self.start = date_from_str(start)
+ else:
+ self.start = datetime.datetime.min.date()
+ if end is not None:
+ self.end = date_from_str(end)
+ else:
+ self.end = datetime.datetime.max.date()
+ if self.start > self.end:
+ raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
+ @classmethod
+ def day(cls, day):
+ """Returns a range that only contains the given day"""
+ return cls(day, day)
+
+ def __contains__(self, date):
+ """Check if the date is in the range"""
+ if not isinstance(date, datetime.date):
+ date = date_from_str(date)
+ return self.start <= date <= self.end
+
+ def __str__(self):
+ return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+ """ Returns the platform name as a compat_str """
+ res = platform.platform()
+ if isinstance(res, bytes):
+ res = res.decode(preferredencoding())
+
+ assert isinstance(res, compat_str)
+ return res
+
+
+def _windows_write_string(s, out):
+ """ Returns True if the string was written using special methods,
+ False if it has yet to be written out."""
+ # Adapted from http://stackoverflow.com/a/3259271/35070
+
+ import ctypes
+ import ctypes.wintypes
+
+ WIN_OUTPUT_IDS = {
+ 1: -11,
+ 2: -12,
+ }
+
+ try:
+ fileno = out.fileno()
+ except AttributeError:
+ # If the output stream doesn't have a fileno, it's virtual
+ return False
+ except io.UnsupportedOperation:
+ # Some strange Windows pseudo files?
+ return False
+ if fileno not in WIN_OUTPUT_IDS:
+ return False
+
+ GetStdHandle = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
+ ('GetStdHandle', ctypes.windll.kernel32))
+ h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
+
+ WriteConsoleW = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
+ ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
+ ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
+ written = ctypes.wintypes.DWORD(0)
+
+ GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
+ FILE_TYPE_CHAR = 0x0002
+ FILE_TYPE_REMOTE = 0x8000
+ GetConsoleMode = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
+ ctypes.POINTER(ctypes.wintypes.DWORD))(
+ ('GetConsoleMode', ctypes.windll.kernel32))
+ INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
+
+ def not_a_console(handle):
+ if handle == INVALID_HANDLE_VALUE or handle is None:
+ return True
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+ or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+
+ if not_a_console(h):
+ return False
+
+ def next_nonbmp_pos(s):
+ try:
+ return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
+ except StopIteration:
+ return len(s)
+
+ while s:
+ count = min(next_nonbmp_pos(s), 1024)
+
+ ret = WriteConsoleW(
+ h, s, count if count else 2, ctypes.byref(written), None)
+ if ret == 0:
+ raise OSError('Failed to write string')
+ if not count: # We just wrote a non-BMP character
+ assert written.value == 2
+ s = s[1:]
+ else:
+ assert written.value > 0
+ s = s[written.value:]
+ return True
+
+
+def write_string(s, out=None, encoding=None):
+ if out is None:
+ out = sys.stderr
+ assert type(s) == compat_str
+
+ if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
+ if _windows_write_string(s, out):
+ return
+
+ if ('b' in getattr(out, 'mode', '')
+ or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ byt = s.encode(encoding or preferredencoding(), 'ignore')
+ out.write(byt)
+ elif hasattr(out, 'buffer'):
+ enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+ byt = s.encode(enc, 'ignore')
+ out.buffer.write(byt)
+ else:
+ out.write(s)
+ out.flush()
+
+
+def bytes_to_intlist(bs):
+ if not bs:
+ return []
+ if isinstance(bs[0], int): # Python 3
+ return list(bs)
+ else:
+ return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+ if not xs:
+ return b''
+ return compat_struct_pack('%dB' % len(xs), *xs)
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+ import ctypes.wintypes
+ import msvcrt
+
+ class OVERLAPPED(ctypes.Structure):
+ _fields_ = [
+ ('Internal', ctypes.wintypes.LPVOID),
+ ('InternalHigh', ctypes.wintypes.LPVOID),
+ ('Offset', ctypes.wintypes.DWORD),
+ ('OffsetHigh', ctypes.wintypes.DWORD),
+ ('hEvent', ctypes.wintypes.HANDLE),
+ ]
+
+ kernel32 = ctypes.windll.kernel32
+ LockFileEx = kernel32.LockFileEx
+ LockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwFlags
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ LockFileEx.restype = ctypes.wintypes.BOOL
+ UnlockFileEx = kernel32.UnlockFileEx
+ UnlockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ UnlockFileEx.restype = ctypes.wintypes.BOOL
+ whole_low = 0xffffffff
+ whole_high = 0x7fffffff
+
+ def _lock_file(f, exclusive):
+ overlapped = OVERLAPPED()
+ overlapped.Offset = 0
+ overlapped.OffsetHigh = 0
+ overlapped.hEvent = 0
+ f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+ def _unlock_file(f):
+ assert f._lock_file_overlapped_p
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not UnlockFileEx(handle, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+ # Some platforms, such as Jython, is missing fcntl
+ try:
+ import fcntl
+
+ def _lock_file(f, exclusive):
+ fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+ def _unlock_file(f):
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except ImportError:
+ UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+ def _lock_file(f, exclusive):
+ raise IOError(UNSUPPORTED_MSG)
+
+ def _unlock_file(f):
+ raise IOError(UNSUPPORTED_MSG)
+
+
+class locked_file(object):
+ def __init__(self, filename, mode, encoding=None):
+ assert mode in ['r', 'a', 'w']
+ self.f = io.open(filename, mode, encoding=encoding)
+ self.mode = mode
+
+ def __enter__(self):
+ exclusive = self.mode != 'r'
+ try:
+ _lock_file(self.f, exclusive)
+ except IOError:
+ self.f.close()
+ raise
+ return self
+
+ def __exit__(self, etype, value, traceback):
+ try:
+ _unlock_file(self.f)
+ finally:
+ self.f.close()
+
+ def __iter__(self):
+ return iter(self.f)
+
+ def write(self, *args):
+ return self.f.write(*args)
+
+ def read(self, *args):
+ return self.f.read(*args)
+
+
+def get_filesystem_encoding():
+ encoding = sys.getfilesystemencoding()
+ return encoding if encoding is not None else 'utf-8'
+
+
+def shell_quote(args):
+ quoted_args = []
+ encoding = get_filesystem_encoding()
+ for a in args:
+ if isinstance(a, bytes):
+ # We may get a filename encoded with 'encodeFilename'
+ a = a.decode(encoding)
+ quoted_args.append(compat_shlex_quote(a))
+ return ' '.join(quoted_args)
+
+
+def smuggle_url(url, data):
+ """ Pass additional data in a URL for internal use. """
+
+ url, idata = unsmuggle_url(url, {})
+ data.update(idata)
+ sdata = compat_urllib_parse_urlencode(
+ {'__youtubedl_smuggle': json.dumps(data)})
+ return url + '#' + sdata
+
+
+def unsmuggle_url(smug_url, default=None):
+ if '#__youtubedl_smuggle' not in smug_url:
+ return smug_url, default
+ url, _, sdata = smug_url.rpartition('#')
+ jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ data = json.loads(jsond)
+ return url, data
+
+
+def format_bytes(bytes):
+ if bytes is None:
+ return 'N/A'
+ if type(bytes) is str:
+ bytes = float(bytes)
+ if bytes == 0.0:
+ exponent = 0
+ else:
+ exponent = int(math.log(bytes, 1024.0))
+ suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
+ converted = float(bytes) / float(1024 ** exponent)
+ return '%.2f%s' % (converted, suffix)
+
+
+def lookup_unit_table(unit_table, s):
+ units_re = '|'.join(re.escape(u) for u in unit_table)
+ m = re.match(
+ r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
+ if not m:
+ return None
+ num_str = m.group('num').replace(',', '.')
+ mult = unit_table[m.group('unit')]
+ return int(float(num_str) * mult)
+
+
+def parse_filesize(s):
+ if s is None:
+ return None
+
+ # The lower-case forms are of course incorrect and unofficial,
+ # but we support those too
+ _UNIT_TABLE = {
+ 'B': 1,
+ 'b': 1,
+ 'bytes': 1,
+ 'KiB': 1024,
+ 'KB': 1000,
+ 'kB': 1024,
+ 'Kb': 1000,
+ 'kb': 1000,
+ 'kilobytes': 1000,
+ 'kibibytes': 1024,
+ 'MiB': 1024 ** 2,
+ 'MB': 1000 ** 2,
+ 'mB': 1024 ** 2,
+ 'Mb': 1000 ** 2,
+ 'mb': 1000 ** 2,
+ 'megabytes': 1000 ** 2,
+ 'mebibytes': 1024 ** 2,
+ 'GiB': 1024 ** 3,
+ 'GB': 1000 ** 3,
+ 'gB': 1024 ** 3,
+ 'Gb': 1000 ** 3,
+ 'gb': 1000 ** 3,
+ 'gigabytes': 1000 ** 3,
+ 'gibibytes': 1024 ** 3,
+ 'TiB': 1024 ** 4,
+ 'TB': 1000 ** 4,
+ 'tB': 1024 ** 4,
+ 'Tb': 1000 ** 4,
+ 'tb': 1000 ** 4,
+ 'terabytes': 1000 ** 4,
+ 'tebibytes': 1024 ** 4,
+ 'PiB': 1024 ** 5,
+ 'PB': 1000 ** 5,
+ 'pB': 1024 ** 5,
+ 'Pb': 1000 ** 5,
+ 'pb': 1000 ** 5,
+ 'petabytes': 1000 ** 5,
+ 'pebibytes': 1024 ** 5,
+ 'EiB': 1024 ** 6,
+ 'EB': 1000 ** 6,
+ 'eB': 1024 ** 6,
+ 'Eb': 1000 ** 6,
+ 'eb': 1000 ** 6,
+ 'exabytes': 1000 ** 6,
+ 'exbibytes': 1024 ** 6,
+ 'ZiB': 1024 ** 7,
+ 'ZB': 1000 ** 7,
+ 'zB': 1024 ** 7,
+ 'Zb': 1000 ** 7,
+ 'zb': 1000 ** 7,
+ 'zettabytes': 1000 ** 7,
+ 'zebibytes': 1024 ** 7,
+ 'YiB': 1024 ** 8,
+ 'YB': 1000 ** 8,
+ 'yB': 1024 ** 8,
+ 'Yb': 1000 ** 8,
+ 'yb': 1000 ** 8,
+ 'yottabytes': 1000 ** 8,
+ 'yobibytes': 1024 ** 8,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+ if s is None:
+ return None
+
+ s = s.strip()
+
+ if re.match(r'^[\d,.]+$', s):
+ return str_to_int(s)
+
+ _UNIT_TABLE = {
+ 'k': 1000,
+ 'K': 1000,
+ 'm': 1000 ** 2,
+ 'M': 1000 ** 2,
+ 'kk': 1000 ** 2,
+ 'KK': 1000 ** 2,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_resolution(s):
+ if s is None:
+ return {}
+
+ mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
+ if mobj:
+ return {
+ 'width': int(mobj.group('w')),
+ 'height': int(mobj.group('h')),
+ }
+
+ mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1))}
+
+ mobj = re.search(r'\b([48])[kK]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1)) * 540}
+
+ return {}
+
+
+def parse_bitrate(s):
+ if not isinstance(s, compat_str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
+def month_by_name(name, lang='en'):
+ """ Return the number of a month by (locale-independently) English name """
+
+ month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
+ try:
+ return month_names.index(name) + 1
+ except ValueError:
+ return None
+
+
+def month_by_abbreviation(abbrev):
+ """ Return the number of a month by (locale-independently) English
+ abbreviations """
+
+ try:
+ return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
+ except ValueError:
+ return None
+
+
+def fix_xml_ampersands(xml_str):
+ """Replace all the '&' by '&amp;' in XML"""
+ return re.sub(
+ r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
+ '&amp;',
+ xml_str)
+
+
+def setproctitle(title):
+ assert isinstance(title, compat_str)
+
+ # ctypes in Jython is not complete
+ # http://bugs.jython.org/issue2148
+ if sys.platform.startswith('java'):
+ return
+
+ try:
+ libc = ctypes.cdll.LoadLibrary('libc.so.6')
+ except OSError:
+ return
+ except TypeError:
+ # LoadLibrary in Windows Python 2.7.13 only expects
+ # a bytestring, but since unicode_literals turns
+ # every string into a unicode string, it fails.
+ return
+ title_bytes = title.encode('utf-8')
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
+ try:
+ libc.prctl(15, buf, 0, 0, 0)
+ except AttributeError:
+ return # Strange libc, just skip this
+
+
+def remove_start(s, start):
+ return s[len(start):] if s is not None and s.startswith(start) else s
+
+
+def remove_end(s, end):
+ return s[:-len(end)] if s is not None and s.endswith(end) else s
+
+
+def remove_quotes(s):
+ if s is None or len(s) < 2:
+ return s
+ for quote in ('"', "'", ):
+ if s[0] == quote and s[-1] == quote:
+ return s[1:-1]
+ return s
+
+
+def url_basename(url):
+ path = compat_urlparse.urlparse(url).path
+ return path.strip('/').split('/')[-1]
+
+
+def base_url(url):
+ return re.match(r'https?://[^?#&]+/', url).group()
+
+
+def urljoin(base, path):
+ if isinstance(path, bytes):
+ path = path.decode('utf-8')
+ if not isinstance(path, compat_str) or not path:
+ return None
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
+ return path
+ if isinstance(base, bytes):
+ base = base.decode('utf-8')
+ if not isinstance(base, compat_str) or not re.match(
+ r'^(?:https?:)?//', base):
+ return None
+ return compat_urlparse.urljoin(base, path)
+
+
+class HEADRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'HEAD'
+
+
+class PUTRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'PUT'
+
+
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
+ if get_attr:
+ if v is not None:
+ v = getattr(v, get_attr, None)
+ if v == '':
+ v = None
+ if v is None:
+ return default
+ try:
+ return int(v) * invscale // scale
+ except (ValueError, TypeError):
+ return default
+
+
+def str_or_none(v, default=None):
+ return default if v is None else compat_str(v)
+
+
+def str_to_int(int_str):
+ """ A more relaxed version of int_or_none """
+ if isinstance(int_str, compat_integer_types):
+ return int_str
+ elif isinstance(int_str, compat_str):
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int_or_none(int_str)
+
+
+def float_or_none(v, scale=1, invscale=1, default=None):
+ if v is None:
+ return default
+ try:
+ return float(v) * invscale / scale
+ except (ValueError, TypeError):
+ return default
+
+
+def bool_or_none(v, default=None):
+ return v if isinstance(v, bool) else default
+
+
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, compat_str) else default
+
+
+def url_or_none(url):
+ if not url or not isinstance(url, compat_str):
+ return None
+ url = url.strip()
+ return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
+
+
+def parse_duration(s):
+ if not isinstance(s, compat_basestring):
+ return None
+
+ s = s.strip()
+
+ days, hours, mins, secs, ms = [None] * 5
+ m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(
+ r'''(?ix)(?:P?
+ (?:
+ [0-9]+\s*y(?:ears?)?\s*
+ )?
+ (?:
+ [0-9]+\s*m(?:onths?)?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?\s*
+ )?
+ (?:
+ (?P<days>[0-9]+)\s*d(?:ays?)?\s*
+ )?
+ T)?
+ (?:
+ (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
+ )?
+ (?:
+ (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
+ )?
+ (?:
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
+ )?Z?$''', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
+ if m:
+ hours, mins = m.groups()
+ else:
+ return None
+
+ duration = 0
+ if secs:
+ duration += float(secs)
+ if mins:
+ duration += float(mins) * 60
+ if hours:
+ duration += float(hours) * 60 * 60
+ if days:
+ duration += float(days) * 24 * 60 * 60
+ if ms:
+ duration += float(ms)
+ return duration
+
+
+def prepend_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return (
+ '{0}.{1}{2}'.format(name, ext, real_ext)
+ if not expected_real_ext or real_ext[1:] == expected_real_ext
+ else '{0}.{1}'.format(filename, ext))
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return '{0}.{1}'.format(
+ name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+ ext)
+
+
+def check_executable(exe, args=[]):
+ """ Checks if the given binary is installed somewhere in PATH, and returns its name.
+ args can be a list of arguments for a short output (like -version) """
+ try:
+ subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+ except OSError:
+ return False
+ return exe
+
+
+def get_exe_version(exe, args=['--version'],
+ version_re=None, unrecognized='present'):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if hypervideo is run in the background.
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
+ out, _ = subprocess.Popen(
+ [encodeArgument(exe)] + args,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ except OSError:
+ return False
+ if isinstance(out, bytes): # Python 2.x
+ out = out.decode('ascii', 'ignore')
+ return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+ assert isinstance(output, compat_str)
+ if version_re is None:
+ version_re = r'version\s+([-0-9._a-zA-Z]+)'
+ m = re.search(version_re, output)
+ if m:
+ return m.group(1)
+ else:
+ return unrecognized
+
+
+class PagedList(object):
+ def __len__(self):
+ # This is only useful for tests
+ return len(self.getslice())
+
+
+class OnDemandPagedList(PagedList):
+ def __init__(self, pagefunc, pagesize, use_cache=True):
+ self._pagefunc = pagefunc
+ self._pagesize = pagesize
+ self._use_cache = use_cache
+ if use_cache:
+ self._cache = {}
+
+ def getslice(self, start=0, end=None):
+ res = []
+ for pagenum in itertools.count(start // self._pagesize):
+ firstid = pagenum * self._pagesize
+ nextfirstid = pagenum * self._pagesize + self._pagesize
+ if start >= nextfirstid:
+ continue
+
+ page_results = None
+ if self._use_cache:
+ page_results = self._cache.get(pagenum)
+ if page_results is None:
+ page_results = list(self._pagefunc(pagenum))
+ if self._use_cache:
+ self._cache[pagenum] = page_results
+
+ startv = (
+ start % self._pagesize
+ if firstid <= start < nextfirstid
+ else 0)
+
+ endv = (
+ ((end - 1) % self._pagesize) + 1
+ if (end is not None and firstid <= end <= nextfirstid)
+ else None)
+
+ if startv != 0 or endv is not None:
+ page_results = page_results[startv:endv]
+ res.extend(page_results)
+
+ # A little optimization - if current page is not "full", ie. does
+ # not contain page_size videos then we can assume that this page
+ # is the last one - there are no more ids on further pages -
+ # i.e. no need to query again.
+ if len(page_results) + startv < self._pagesize:
+ break
+
+ # If we got the whole page, but the next page is not interesting,
+ # break out early as well
+ if end == nextfirstid:
+ break
+ return res
+
+
+class InAdvancePagedList(PagedList):
+ def __init__(self, pagefunc, pagecount, pagesize):
+ self._pagefunc = pagefunc
+ self._pagecount = pagecount
+ self._pagesize = pagesize
+
+ def getslice(self, start=0, end=None):
+ res = []
+ start_page = start // self._pagesize
+ end_page = (
+ self._pagecount if end is None else (end // self._pagesize + 1))
+ skip_elems = start - start_page * self._pagesize
+ only_more = None if end is None else end - start
+ for pagenum in range(start_page, end_page):
+ page = list(self._pagefunc(pagenum))
+ if skip_elems:
+ page = page[skip_elems:]
+ skip_elems = None
+ if only_more is not None:
+ if len(page) < only_more:
+ only_more -= len(page)
+ else:
+ page = page[:only_more]
+ res.extend(page)
+ break
+ res.extend(page)
+ return res
+
+
+def uppercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\U[0-9a-fA-F]{8}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def lowercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\u[0-9a-fA-F]{4}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def escape_rfc3986(s):
+ """Escape non-ASCII characters as suggested by RFC 3986"""
+ if sys.version_info < (3, 0) and isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def escape_url(url):
+ """Escape URL as suggested by RFC 3986"""
+ url_parsed = compat_urllib_parse_urlparse(url)
+ return url_parsed._replace(
+ netloc=url_parsed.netloc.encode('idna').decode('ascii'),
+ path=escape_rfc3986(url_parsed.path),
+ params=escape_rfc3986(url_parsed.params),
+ query=escape_rfc3986(url_parsed.query),
+ fragment=escape_rfc3986(url_parsed.fragment)
+ ).geturl()
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = '\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+
+
+def update_url_query(url, query):
+ if not query:
+ return url
+ parsed_url = compat_urlparse.urlparse(url)
+ qs = compat_parse_qs(parsed_url.query)
+ qs.update(query)
+ return compat_urlparse.urlunparse(parsed_url._replace(
+ query=compat_urllib_parse_urlencode(qs, True)))
+
+
+def update_Request(req, url=None, data=None, headers={}, query={}):
+ req_headers = req.headers.copy()
+ req_headers.update(headers)
+ req_data = data or req.data
+ req_url = update_url_query(url or req.get_full_url(), query)
+ req_get_method = req.get_method()
+ if req_get_method == 'HEAD':
+ req_type = HEADRequest
+ elif req_get_method == 'PUT':
+ req_type = PUTRequest
+ else:
+ req_type = compat_urllib_request.Request
+ new_req = req_type(
+ req_url, data=req_data, headers=req_headers,
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+ if hasattr(req, 'timeout'):
+ new_req.timeout = req.timeout
+ return new_req
+
+
+def _multipart_encode_impl(data, boundary):
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+
+ out = b''
+ for k, v in data.items():
+ out += b'--' + boundary.encode('ascii') + b'\r\n'
+ if isinstance(k, compat_str):
+ k = k.encode('utf-8')
+ if isinstance(v, compat_str):
+ v = v.encode('utf-8')
+ # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
+ # suggests sending UTF-8 directly. Firefox sends UTF-8, too
+ content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
+ if boundary.encode('ascii') in content:
+ raise ValueError('Boundary overlaps with data')
+ out += content
+
+ out += b'--' + boundary.encode('ascii') + b'--\r\n'
+
+ return out, content_type
+
+
+def multipart_encode(data, boundary=None):
+ '''
+ Encode a dict to RFC 7578-compliant form-data
+
+ data:
+ A dict where keys and values can be either Unicode or bytes-like
+ objects.
+ boundary:
+ If specified a Unicode object, it's used as the boundary. Otherwise
+ a random boundary is generated.
+
+ Reference: https://tools.ietf.org/html/rfc7578
+ '''
+ has_specified_boundary = boundary is not None
+
+ while True:
+ if boundary is None:
+ boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
+
+ try:
+ out, content_type = _multipart_encode_impl(data, boundary)
+ break
+ except ValueError:
+ if has_specified_boundary:
+ raise
+ boundary = None
+
+ return out, content_type
+
+
+def dict_get(d, key_or_keys, default=None, skip_false_values=True):
+ if isinstance(key_or_keys, (list, tuple)):
+ for key in key_or_keys:
+ if key not in d or d[key] is None or skip_false_values and not d[key]:
+ continue
+ return d[key]
+ return default
+ return d.get(key_or_keys, default)
+
+
+def try_get(src, getter, expected_type=None):
+ if not isinstance(getter, (list, tuple)):
+ getter = [getter]
+ for get in getter:
+ try:
+ v = get(src)
+ except (AttributeError, KeyError, TypeError, IndexError):
+ pass
+ else:
+ if expected_type is None or isinstance(v, expected_type):
+ return v
+
+
+def merge_dicts(*dicts):
+ merged = {}
+ for a_dict in dicts:
+ for k, v in a_dict.items():
+ if v is None:
+ continue
+ if (k not in merged
+ or (isinstance(v, compat_str) and v
+ and isinstance(merged[k], compat_str)
+ and not merged[k])):
+ merged[k] = v
+ return merged
+
+
+def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
+ return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+TV_PARENTAL_GUIDELINES = {
+ 'TV-Y': 0,
+ 'TV-Y7': 7,
+ 'TV-G': 0,
+ 'TV-PG': 0,
+ 'TV-14': 14,
+ 'TV-MA': 17,
+}
+
+
+def parse_age_limit(s):
+ if type(s) == int:
+ return s if 0 <= s <= 21 else None
+ if not isinstance(s, compat_basestring):
+ return None
+ m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
+ if m:
+ return int(m.group('age'))
+ if s in US_RATINGS:
+ return US_RATINGS[s]
+ m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+ if m:
+ return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+ return None
+
+
+def strip_jsonp(code):
+ return re.sub(
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
+
+
+def js_to_json(code):
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+ SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+ INTEGER_TABLE = (
+ (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+ (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+ )
+
+ def fix_kv(m):
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
+ return ""
+
+ if v[0] in ("'", '"'):
+ v = re.sub(r'(?s)\\.|"', lambda m: {
+ '"': '\\"',
+ "\\'": "'",
+ '\\\n': '',
+ '\\x': '\\u00',
+ }.get(m.group(0), m.group(0)), v[1:-1])
+ else:
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return '"%d":' % i if v.endswith(':') else '%d' % i
+
+ return '"%s"' % v
+
+ return re.sub(r'''(?sx)
+ "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+ {comment}|,(?={skip}[\]}}])|
+ (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+ [0-9]+(?={skip}:)|
+ !+
+ '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
+
+
+def qualities(quality_ids):
+ """ Get a numeric quality value out of a list of possible values """
+ def q(qid):
+ try:
+ return quality_ids.index(qid)
+ except ValueError:
+ return -1
+ return q
+
+
+DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+
+
+def limit_length(s, length):
+ """ Add ellipses to overly long strings """
+ if s is None:
+ return None
+ ELLIPSES = '...'
+ if len(s) > length:
+ return s[:length - len(ELLIPSES)] + ELLIPSES
+ return s
+
+
+def version_tuple(v):
+ return tuple(int(e) for e in re.split(r'[-.]', v))
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if hypervideo can be updated with -U """
+ from zipimport import zipimporter
+
+ return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(compat_shlex_quote(a) for a in args)
+
+
+def error_to_compat_str(err):
+ err_str = str(err)
+ # On python 2 error byte string must be decoded with proper
+ # encoding rather than ascii
+ if sys.version_info[0] < 3:
+ err_str = err_str.decode(preferredencoding())
+ return err_str
+
+
+def mimetype2ext(mt):
+ if mt is None:
+ return None
+
+ ext = {
+ 'audio/mp4': 'm4a',
+ # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
+ # it's the most popular one
+ 'audio/mpeg': 'mp3',
+ }.get(mt)
+ if ext is not None:
+ return ext
+
+ _, _, res = mt.rpartition('/')
+ res = res.split(';')[0].strip().lower()
+
+ return {
+ '3gpp': '3gp',
+ 'smptett+xml': 'tt',
+ 'ttaf+xml': 'dfxp',
+ 'ttml+xml': 'ttml',
+ 'x-flv': 'flv',
+ 'x-mp4-fragmented': 'mp4',
+ 'x-ms-sami': 'sami',
+ 'x-ms-wmv': 'wmv',
+ 'mpegurl': 'm3u8',
+ 'x-mpegurl': 'm3u8',
+ 'vnd.apple.mpegurl': 'm3u8',
+ 'dash+xml': 'mpd',
+ 'f4m+xml': 'f4m',
+ 'hds+xml': 'f4m',
+ 'vnd.ms-sstr+xml': 'ism',
+ 'quicktime': 'mov',
+ 'mp2t': 'ts',
+ 'x-wav': 'wav',
+ }.get(res, res)
+
+
+def parse_codecs(codecs_str):
+ # http://tools.ietf.org/html/rfc6381
+ if not codecs_str:
+ return {}
+ split_codecs = list(filter(None, map(
+ lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
+ vcodec, acodec = None, None
+ for full_codec in split_codecs:
+ codec = full_codec.split('.')[0]
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
+ if not vcodec:
+ vcodec = full_codec
+ elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ if not acodec:
+ acodec = full_codec
+ else:
+ write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
+ if not vcodec and not acodec:
+ if len(split_codecs) == 2:
+ return {
+ 'vcodec': split_codecs[0],
+ 'acodec': split_codecs[1],
+ }
+ else:
+ return {
+ 'vcodec': vcodec or 'none',
+ 'acodec': acodec or 'none',
+ }
+ return {}
+
+
+def urlhandle_detect_ext(url_handle):
+ getheader = url_handle.headers.get
+
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
+ return mimetype2ext(getheader('Content-Type'))
+
+
+def encode_data_uri(data, mime_type):
+ return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+ ]
+ for bom, enc in BOMS:
+ if first_bytes.startswith(bom):
+ s = first_bytes[len(bom):].decode(enc, 'replace')
+ break
+ else:
+ s = first_bytes.decode('utf-8', 'replace')
+
+ return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+ protocol = info_dict.get('protocol')
+ if protocol is not None:
+ return protocol
+
+ url = info_dict['url']
+ if url.startswith('rtmp'):
+ return 'rtmp'
+ elif url.startswith('mms'):
+ return 'mms'
+ elif url.startswith('rtsp'):
+ return 'rtsp'
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ return 'm3u8'
+ elif ext == 'f4m':
+ return 'f4m'
+
+ return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+ """ Render a list of rows, each as a list of values """
+ table = [header_row] + data
+ max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+ COMPARISON_OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-z_]+)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
+ (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ )
+ \s*$
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = COMPARISON_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ if (m.group('quotedstrval') is not None
+ or m.group('strval') is not None
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11082).
+ or actual_value is not None and m.group('intval') is not None
+ and isinstance(actual_value, compat_str)):
+ if m.group('op') not in ('=', '!='):
+ raise ValueError(
+ 'Operator %s does not support string values!' % m.group('op'))
+ comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
+ quote = m.group('quote')
+ if quote is not None:
+ comparison_value = comparison_value.replace(r'\%s' % quote, quote)
+ else:
+ try:
+ comparison_value = int(m.group('intval'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('intval'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('intval') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid integer value %r in filter part %r' % (
+ m.group('intval'), filter_part))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+
+ UNARY_OPERATORS = {
+ '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+ '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+ return all(
+ _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+ def _match_func(info_dict):
+ if match_str(filter_str, info_dict):
+ return None
+ else:
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ return _match_func
+
+
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return
+
+ mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
+
+
+def srt_subtitles_timecode(seconds):
+ return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
+
+def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
+ LEGACY_NAMESPACES = (
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
+ ]),
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
+ ]),
+ )
+
+ SUPPORTED_STYLING = [
+ 'color',
+ 'fontFamily',
+ 'fontSize',
+ 'fontStyle',
+ 'fontWeight',
+ 'textDecoration'
+ ]
+
+ _x = functools.partial(xpath_with_ns, ns_map={
+ 'xml': 'http://www.w3.org/XML/1998/namespace',
+ 'ttml': 'http://www.w3.org/ns/ttml',
+ 'tts': 'http://www.w3.org/ns/ttml#styling',
+ })
+
+ styles = {}
+ default_style = {}
+
+ class TTMLPElementParser(object):
+ _out = ''
+ _unclosed_elements = []
+ _applied_styles = []
+
+ def start(self, tag, attrib):
+ if tag in (_x('ttml:br'), 'br'):
+ self._out += '\n'
+ else:
+ unclosed_elements = []
+ style = {}
+ element_style_id = attrib.get('style')
+ if default_style:
+ style.update(default_style)
+ if element_style_id:
+ style.update(styles.get(element_style_id, {}))
+ for prop in SUPPORTED_STYLING:
+ prop_val = attrib.get(_x('tts:' + prop))
+ if prop_val:
+ style[prop] = prop_val
+ if style:
+ font = ''
+ for k, v in sorted(style.items()):
+ if self._applied_styles and self._applied_styles[-1].get(k) == v:
+ continue
+ if k == 'color':
+ font += ' color="%s"' % v
+ elif k == 'fontSize':
+ font += ' size="%s"' % v
+ elif k == 'fontFamily':
+ font += ' face="%s"' % v
+ elif k == 'fontWeight' and v == 'bold':
+ self._out += '<b>'
+ unclosed_elements.append('b')
+ elif k == 'fontStyle' and v == 'italic':
+ self._out += '<i>'
+ unclosed_elements.append('i')
+ elif k == 'textDecoration' and v == 'underline':
+ self._out += '<u>'
+ unclosed_elements.append('u')
+ if font:
+ self._out += '<font' + font + '>'
+ unclosed_elements.append('font')
+ applied_style = {}
+ if self._applied_styles:
+ applied_style.update(self._applied_styles[-1])
+ applied_style.update(style)
+ self._applied_styles.append(applied_style)
+ self._unclosed_elements.append(unclosed_elements)
+
+ def end(self, tag):
+ if tag not in (_x('ttml:br'), 'br'):
+ unclosed_elements = self._unclosed_elements.pop()
+ for element in reversed(unclosed_elements):
+ self._out += '</%s>' % element
+ if unclosed_elements and self._applied_styles:
+ self._applied_styles.pop()
+
+ def data(self, data):
+ self._out += data
+
+ def close(self):
+ return self._out.strip()
+
+ def parse_node(node):
+ target = TTMLPElementParser()
+ parser = xml.etree.ElementTree.XMLParser(target=target)
+ parser.feed(xml.etree.ElementTree.tostring(node))
+ return parser.close()
+
+ for k, v in LEGACY_NAMESPACES:
+ for ns in v:
+ dfxp_data = dfxp_data.replace(ns, k)
+
+ dfxp = compat_etree_fromstring(dfxp_data)
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+ if not paras:
+ raise ValueError('Invalid dfxp/TTML subtitle')
+
+ repeat = False
+ while True:
+ for style in dfxp.findall(_x('.//ttml:style')):
+ style_id = style.get('id') or style.get(_x('xml:id'))
+ if not style_id:
+ continue
+ parent_style_id = style.get('style')
+ if parent_style_id:
+ if parent_style_id not in styles:
+ repeat = True
+ continue
+ styles[style_id] = styles[parent_style_id].copy()
+ for prop in SUPPORTED_STYLING:
+ prop_val = style.get(_x('tts:' + prop))
+ if prop_val:
+ styles.setdefault(style_id, {})[prop] = prop_val
+ if repeat:
+ repeat = False
+ else:
+ break
+
+ for p in ('body', 'div'):
+ ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
+ if ele is None:
+ continue
+ style = styles.get(ele.get('style'))
+ if not style:
+ continue
+ default_style.update(style)
+
+ for para, index in zip(paras, itertools.count(1)):
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
+ end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
+ if not end_time:
+ if not dur:
+ continue
+ end_time = begin_time + dur
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ srt_subtitles_timecode(begin_time),
+ srt_subtitles_timecode(end_time),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
+def cli_option(params, command_option, param):
+ param = params.get(param)
+ if param:
+ param = compat_str(param)
+ return [command_option, param] if param is not None else []
+
+
+def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
+ param = params.get(param)
+ if param is None:
+ return []
+ assert isinstance(param, bool)
+ if separator:
+ return [command_option + separator + (true_value if param else false_value)]
+ return [command_option, true_value if param else false_value]
+
+
+def cli_valueless_option(params, command_option, param, expected_value=True):
+ param = params.get(param)
+ return [command_option] if param == expected_value else []
+
+
+def cli_configuration_args(params, param, default=[]):
+ ex_args = params.get(param)
+ if ex_args is None:
+ return default
+ assert isinstance(ex_args, list)
+ return ex_args
+
+
+class ISO639Utils(object):
+ # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+ _lang_map = {
+ 'aa': 'aar',
+ 'ab': 'abk',
+ 'ae': 'ave',
+ 'af': 'afr',
+ 'ak': 'aka',
+ 'am': 'amh',
+ 'an': 'arg',
+ 'ar': 'ara',
+ 'as': 'asm',
+ 'av': 'ava',
+ 'ay': 'aym',
+ 'az': 'aze',
+ 'ba': 'bak',
+ 'be': 'bel',
+ 'bg': 'bul',
+ 'bh': 'bih',
+ 'bi': 'bis',
+ 'bm': 'bam',
+ 'bn': 'ben',
+ 'bo': 'bod',
+ 'br': 'bre',
+ 'bs': 'bos',
+ 'ca': 'cat',
+ 'ce': 'che',
+ 'ch': 'cha',
+ 'co': 'cos',
+ 'cr': 'cre',
+ 'cs': 'ces',
+ 'cu': 'chu',
+ 'cv': 'chv',
+ 'cy': 'cym',
+ 'da': 'dan',
+ 'de': 'deu',
+ 'dv': 'div',
+ 'dz': 'dzo',
+ 'ee': 'ewe',
+ 'el': 'ell',
+ 'en': 'eng',
+ 'eo': 'epo',
+ 'es': 'spa',
+ 'et': 'est',
+ 'eu': 'eus',
+ 'fa': 'fas',
+ 'ff': 'ful',
+ 'fi': 'fin',
+ 'fj': 'fij',
+ 'fo': 'fao',
+ 'fr': 'fra',
+ 'fy': 'fry',
+ 'ga': 'gle',
+ 'gd': 'gla',
+ 'gl': 'glg',
+ 'gn': 'grn',
+ 'gu': 'guj',
+ 'gv': 'glv',
+ 'ha': 'hau',
+ 'he': 'heb',
+ 'iw': 'heb', # Replaced by he in 1989 revision
+ 'hi': 'hin',
+ 'ho': 'hmo',
+ 'hr': 'hrv',
+ 'ht': 'hat',
+ 'hu': 'hun',
+ 'hy': 'hye',
+ 'hz': 'her',
+ 'ia': 'ina',
+ 'id': 'ind',
+ 'in': 'ind', # Replaced by id in 1989 revision
+ 'ie': 'ile',
+ 'ig': 'ibo',
+ 'ii': 'iii',
+ 'ik': 'ipk',
+ 'io': 'ido',
+ 'is': 'isl',
+ 'it': 'ita',
+ 'iu': 'iku',
+ 'ja': 'jpn',
+ 'jv': 'jav',
+ 'ka': 'kat',
+ 'kg': 'kon',
+ 'ki': 'kik',
+ 'kj': 'kua',
+ 'kk': 'kaz',
+ 'kl': 'kal',
+ 'km': 'khm',
+ 'kn': 'kan',
+ 'ko': 'kor',
+ 'kr': 'kau',
+ 'ks': 'kas',
+ 'ku': 'kur',
+ 'kv': 'kom',
+ 'kw': 'cor',
+ 'ky': 'kir',
+ 'la': 'lat',
+ 'lb': 'ltz',
+ 'lg': 'lug',
+ 'li': 'lim',
+ 'ln': 'lin',
+ 'lo': 'lao',
+ 'lt': 'lit',
+ 'lu': 'lub',
+ 'lv': 'lav',
+ 'mg': 'mlg',
+ 'mh': 'mah',
+ 'mi': 'mri',
+ 'mk': 'mkd',
+ 'ml': 'mal',
+ 'mn': 'mon',
+ 'mr': 'mar',
+ 'ms': 'msa',
+ 'mt': 'mlt',
+ 'my': 'mya',
+ 'na': 'nau',
+ 'nb': 'nob',
+ 'nd': 'nde',
+ 'ne': 'nep',
+ 'ng': 'ndo',
+ 'nl': 'nld',
+ 'nn': 'nno',
+ 'no': 'nor',
+ 'nr': 'nbl',
+ 'nv': 'nav',
+ 'ny': 'nya',
+ 'oc': 'oci',
+ 'oj': 'oji',
+ 'om': 'orm',
+ 'or': 'ori',
+ 'os': 'oss',
+ 'pa': 'pan',
+ 'pi': 'pli',
+ 'pl': 'pol',
+ 'ps': 'pus',
+ 'pt': 'por',
+ 'qu': 'que',
+ 'rm': 'roh',
+ 'rn': 'run',
+ 'ro': 'ron',
+ 'ru': 'rus',
+ 'rw': 'kin',
+ 'sa': 'san',
+ 'sc': 'srd',
+ 'sd': 'snd',
+ 'se': 'sme',
+ 'sg': 'sag',
+ 'si': 'sin',
+ 'sk': 'slk',
+ 'sl': 'slv',
+ 'sm': 'smo',
+ 'sn': 'sna',
+ 'so': 'som',
+ 'sq': 'sqi',
+ 'sr': 'srp',
+ 'ss': 'ssw',
+ 'st': 'sot',
+ 'su': 'sun',
+ 'sv': 'swe',
+ 'sw': 'swa',
+ 'ta': 'tam',
+ 'te': 'tel',
+ 'tg': 'tgk',
+ 'th': 'tha',
+ 'ti': 'tir',
+ 'tk': 'tuk',
+ 'tl': 'tgl',
+ 'tn': 'tsn',
+ 'to': 'ton',
+ 'tr': 'tur',
+ 'ts': 'tso',
+ 'tt': 'tat',
+ 'tw': 'twi',
+ 'ty': 'tah',
+ 'ug': 'uig',
+ 'uk': 'ukr',
+ 'ur': 'urd',
+ 'uz': 'uzb',
+ 've': 'ven',
+ 'vi': 'vie',
+ 'vo': 'vol',
+ 'wa': 'wln',
+ 'wo': 'wol',
+ 'xh': 'xho',
+ 'yi': 'yid',
+ 'ji': 'yid', # Replaced by yi in 1989 revision
+ 'yo': 'yor',
+ 'za': 'zha',
+ 'zh': 'zho',
+ 'zu': 'zul',
+ }
+
+ @classmethod
+ def short2long(cls, code):
+ """Convert language code from ISO 639-1 to ISO 639-2/T"""
+ return cls._lang_map.get(code[:2])
+
+ @classmethod
+ def long2short(cls, code):
+ """Convert language code from ISO 639-2/T to ISO 639-1"""
+ for short_name, long_name in cls._lang_map.items():
+ if long_name == code:
+ return short_name
+
+
+class ISO3166Utils(object):
+ # From http://data.okfn.org/data/core/country-list
+ _country_map = {
+ 'AF': 'Afghanistan',
+ 'AX': 'Åland Islands',
+ 'AL': 'Albania',
+ 'DZ': 'Algeria',
+ 'AS': 'American Samoa',
+ 'AD': 'Andorra',
+ 'AO': 'Angola',
+ 'AI': 'Anguilla',
+ 'AQ': 'Antarctica',
+ 'AG': 'Antigua and Barbuda',
+ 'AR': 'Argentina',
+ 'AM': 'Armenia',
+ 'AW': 'Aruba',
+ 'AU': 'Australia',
+ 'AT': 'Austria',
+ 'AZ': 'Azerbaijan',
+ 'BS': 'Bahamas',
+ 'BH': 'Bahrain',
+ 'BD': 'Bangladesh',
+ 'BB': 'Barbados',
+ 'BY': 'Belarus',
+ 'BE': 'Belgium',
+ 'BZ': 'Belize',
+ 'BJ': 'Benin',
+ 'BM': 'Bermuda',
+ 'BT': 'Bhutan',
+ 'BO': 'Bolivia, Plurinational State of',
+ 'BQ': 'Bonaire, Sint Eustatius and Saba',
+ 'BA': 'Bosnia and Herzegovina',
+ 'BW': 'Botswana',
+ 'BV': 'Bouvet Island',
+ 'BR': 'Brazil',
+ 'IO': 'British Indian Ocean Territory',
+ 'BN': 'Brunei Darussalam',
+ 'BG': 'Bulgaria',
+ 'BF': 'Burkina Faso',
+ 'BI': 'Burundi',
+ 'KH': 'Cambodia',
+ 'CM': 'Cameroon',
+ 'CA': 'Canada',
+ 'CV': 'Cape Verde',
+ 'KY': 'Cayman Islands',
+ 'CF': 'Central African Republic',
+ 'TD': 'Chad',
+ 'CL': 'Chile',
+ 'CN': 'China',
+ 'CX': 'Christmas Island',
+ 'CC': 'Cocos (Keeling) Islands',
+ 'CO': 'Colombia',
+ 'KM': 'Comoros',
+ 'CG': 'Congo',
+ 'CD': 'Congo, the Democratic Republic of the',
+ 'CK': 'Cook Islands',
+ 'CR': 'Costa Rica',
+ 'CI': 'Côte d\'Ivoire',
+ 'HR': 'Croatia',
+ 'CU': 'Cuba',
+ 'CW': 'Curaçao',
+ 'CY': 'Cyprus',
+ 'CZ': 'Czech Republic',
+ 'DK': 'Denmark',
+ 'DJ': 'Djibouti',
+ 'DM': 'Dominica',
+ 'DO': 'Dominican Republic',
+ 'EC': 'Ecuador',
+ 'EG': 'Egypt',
+ 'SV': 'El Salvador',
+ 'GQ': 'Equatorial Guinea',
+ 'ER': 'Eritrea',
+ 'EE': 'Estonia',
+ 'ET': 'Ethiopia',
+ 'FK': 'Falkland Islands (Malvinas)',
+ 'FO': 'Faroe Islands',
+ 'FJ': 'Fiji',
+ 'FI': 'Finland',
+ 'FR': 'France',
+ 'GF': 'French Guiana',
+ 'PF': 'French Polynesia',
+ 'TF': 'French Southern Territories',
+ 'GA': 'Gabon',
+ 'GM': 'Gambia',
+ 'GE': 'Georgia',
+ 'DE': 'Germany',
+ 'GH': 'Ghana',
+ 'GI': 'Gibraltar',
+ 'GR': 'Greece',
+ 'GL': 'Greenland',
+ 'GD': 'Grenada',
+ 'GP': 'Guadeloupe',
+ 'GU': 'Guam',
+ 'GT': 'Guatemala',
+ 'GG': 'Guernsey',
+ 'GN': 'Guinea',
+ 'GW': 'Guinea-Bissau',
+ 'GY': 'Guyana',
+ 'HT': 'Haiti',
+ 'HM': 'Heard Island and McDonald Islands',
+ 'VA': 'Holy See (Vatican City State)',
+ 'HN': 'Honduras',
+ 'HK': 'Hong Kong',
+ 'HU': 'Hungary',
+ 'IS': 'Iceland',
+ 'IN': 'India',
+ 'ID': 'Indonesia',
+ 'IR': 'Iran, Islamic Republic of',
+ 'IQ': 'Iraq',
+ 'IE': 'Ireland',
+ 'IM': 'Isle of Man',
+ 'IL': 'Israel',
+ 'IT': 'Italy',
+ 'JM': 'Jamaica',
+ 'JP': 'Japan',
+ 'JE': 'Jersey',
+ 'JO': 'Jordan',
+ 'KZ': 'Kazakhstan',
+ 'KE': 'Kenya',
+ 'KI': 'Kiribati',
+ 'KP': 'Korea, Democratic People\'s Republic of',
+ 'KR': 'Korea, Republic of',
+ 'KW': 'Kuwait',
+ 'KG': 'Kyrgyzstan',
+ 'LA': 'Lao People\'s Democratic Republic',
+ 'LV': 'Latvia',
+ 'LB': 'Lebanon',
+ 'LS': 'Lesotho',
+ 'LR': 'Liberia',
+ 'LY': 'Libya',
+ 'LI': 'Liechtenstein',
+ 'LT': 'Lithuania',
+ 'LU': 'Luxembourg',
+ 'MO': 'Macao',
+ 'MK': 'Macedonia, the Former Yugoslav Republic of',
+ 'MG': 'Madagascar',
+ 'MW': 'Malawi',
+ 'MY': 'Malaysia',
+ 'MV': 'Maldives',
+ 'ML': 'Mali',
+ 'MT': 'Malta',
+ 'MH': 'Marshall Islands',
+ 'MQ': 'Martinique',
+ 'MR': 'Mauritania',
+ 'MU': 'Mauritius',
+ 'YT': 'Mayotte',
+ 'MX': 'Mexico',
+ 'FM': 'Micronesia, Federated States of',
+ 'MD': 'Moldova, Republic of',
+ 'MC': 'Monaco',
+ 'MN': 'Mongolia',
+ 'ME': 'Montenegro',
+ 'MS': 'Montserrat',
+ 'MA': 'Morocco',
+ 'MZ': 'Mozambique',
+ 'MM': 'Myanmar',
+ 'NA': 'Namibia',
+ 'NR': 'Nauru',
+ 'NP': 'Nepal',
+ 'NL': 'Netherlands',
+ 'NC': 'New Caledonia',
+ 'NZ': 'New Zealand',
+ 'NI': 'Nicaragua',
+ 'NE': 'Niger',
+ 'NG': 'Nigeria',
+ 'NU': 'Niue',
+ 'NF': 'Norfolk Island',
+ 'MP': 'Northern Mariana Islands',
+ 'NO': 'Norway',
+ 'OM': 'Oman',
+ 'PK': 'Pakistan',
+ 'PW': 'Palau',
+ 'PS': 'Palestine, State of',
+ 'PA': 'Panama',
+ 'PG': 'Papua New Guinea',
+ 'PY': 'Paraguay',
+ 'PE': 'Peru',
+ 'PH': 'Philippines',
+ 'PN': 'Pitcairn',
+ 'PL': 'Poland',
+ 'PT': 'Portugal',
+ 'PR': 'Puerto Rico',
+ 'QA': 'Qatar',
+ 'RE': 'Réunion',
+ 'RO': 'Romania',
+ 'RU': 'Russian Federation',
+ 'RW': 'Rwanda',
+ 'BL': 'Saint Barthélemy',
+ 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
+ 'KN': 'Saint Kitts and Nevis',
+ 'LC': 'Saint Lucia',
+ 'MF': 'Saint Martin (French part)',
+ 'PM': 'Saint Pierre and Miquelon',
+ 'VC': 'Saint Vincent and the Grenadines',
+ 'WS': 'Samoa',
+ 'SM': 'San Marino',
+ 'ST': 'Sao Tome and Principe',
+ 'SA': 'Saudi Arabia',
+ 'SN': 'Senegal',
+ 'RS': 'Serbia',
+ 'SC': 'Seychelles',
+ 'SL': 'Sierra Leone',
+ 'SG': 'Singapore',
+ 'SX': 'Sint Maarten (Dutch part)',
+ 'SK': 'Slovakia',
+ 'SI': 'Slovenia',
+ 'SB': 'Solomon Islands',
+ 'SO': 'Somalia',
+ 'ZA': 'South Africa',
+ 'GS': 'South Georgia and the South Sandwich Islands',
+ 'SS': 'South Sudan',
+ 'ES': 'Spain',
+ 'LK': 'Sri Lanka',
+ 'SD': 'Sudan',
+ 'SR': 'Suriname',
+ 'SJ': 'Svalbard and Jan Mayen',
+ 'SZ': 'Swaziland',
+ 'SE': 'Sweden',
+ 'CH': 'Switzerland',
+ 'SY': 'Syrian Arab Republic',
+ 'TW': 'Taiwan, Province of China',
+ 'TJ': 'Tajikistan',
+ 'TZ': 'Tanzania, United Republic of',
+ 'TH': 'Thailand',
+ 'TL': 'Timor-Leste',
+ 'TG': 'Togo',
+ 'TK': 'Tokelau',
+ 'TO': 'Tonga',
+ 'TT': 'Trinidad and Tobago',
+ 'TN': 'Tunisia',
+ 'TR': 'Turkey',
+ 'TM': 'Turkmenistan',
+ 'TC': 'Turks and Caicos Islands',
+ 'TV': 'Tuvalu',
+ 'UG': 'Uganda',
+ 'UA': 'Ukraine',
+ 'AE': 'United Arab Emirates',
+ 'GB': 'United Kingdom',
+ 'US': 'United States',
+ 'UM': 'United States Minor Outlying Islands',
+ 'UY': 'Uruguay',
+ 'UZ': 'Uzbekistan',
+ 'VU': 'Vanuatu',
+ 'VE': 'Venezuela, Bolivarian Republic of',
+ 'VN': 'Viet Nam',
+ 'VG': 'Virgin Islands, British',
+ 'VI': 'Virgin Islands, U.S.',
+ 'WF': 'Wallis and Futuna',
+ 'EH': 'Western Sahara',
+ 'YE': 'Yemen',
+ 'ZM': 'Zambia',
+ 'ZW': 'Zimbabwe',
+ }
+
+ @classmethod
+ def short2full(cls, code):
+ """Convert an ISO 3166-2 country code to the corresponding full name"""
+ return cls._country_map.get(code.upper())
+
+
+class GeoUtils(object):
+ # Major IPv4 address blocks per country
+ _country_ip_map = {
+ 'AD': '46.172.224.0/19',
+ 'AE': '94.200.0.0/13',
+ 'AF': '149.54.0.0/17',
+ 'AG': '209.59.64.0/18',
+ 'AI': '204.14.248.0/21',
+ 'AL': '46.99.0.0/16',
+ 'AM': '46.70.0.0/15',
+ 'AO': '105.168.0.0/13',
+ 'AP': '182.50.184.0/21',
+ 'AQ': '23.154.160.0/24',
+ 'AR': '181.0.0.0/12',
+ 'AS': '202.70.112.0/20',
+ 'AT': '77.116.0.0/14',
+ 'AU': '1.128.0.0/11',
+ 'AW': '181.41.0.0/18',
+ 'AX': '185.217.4.0/22',
+ 'AZ': '5.197.0.0/16',
+ 'BA': '31.176.128.0/17',
+ 'BB': '65.48.128.0/17',
+ 'BD': '114.130.0.0/16',
+ 'BE': '57.0.0.0/8',
+ 'BF': '102.178.0.0/15',
+ 'BG': '95.42.0.0/15',
+ 'BH': '37.131.0.0/17',
+ 'BI': '154.117.192.0/18',
+ 'BJ': '137.255.0.0/16',
+ 'BL': '185.212.72.0/23',
+ 'BM': '196.12.64.0/18',
+ 'BN': '156.31.0.0/16',
+ 'BO': '161.56.0.0/16',
+ 'BQ': '161.0.80.0/20',
+ 'BR': '191.128.0.0/12',
+ 'BS': '24.51.64.0/18',
+ 'BT': '119.2.96.0/19',
+ 'BW': '168.167.0.0/16',
+ 'BY': '178.120.0.0/13',
+ 'BZ': '179.42.192.0/18',
+ 'CA': '99.224.0.0/11',
+ 'CD': '41.243.0.0/16',
+ 'CF': '197.242.176.0/21',
+ 'CG': '160.113.0.0/16',
+ 'CH': '85.0.0.0/13',
+ 'CI': '102.136.0.0/14',
+ 'CK': '202.65.32.0/19',
+ 'CL': '152.172.0.0/14',
+ 'CM': '102.244.0.0/14',
+ 'CN': '36.128.0.0/10',
+ 'CO': '181.240.0.0/12',
+ 'CR': '201.192.0.0/12',
+ 'CU': '152.206.0.0/15',
+ 'CV': '165.90.96.0/19',
+ 'CW': '190.88.128.0/17',
+ 'CY': '31.153.0.0/16',
+ 'CZ': '88.100.0.0/14',
+ 'DE': '53.0.0.0/8',
+ 'DJ': '197.241.0.0/17',
+ 'DK': '87.48.0.0/12',
+ 'DM': '192.243.48.0/20',
+ 'DO': '152.166.0.0/15',
+ 'DZ': '41.96.0.0/12',
+ 'EC': '186.68.0.0/15',
+ 'EE': '90.190.0.0/15',
+ 'EG': '156.160.0.0/11',
+ 'ER': '196.200.96.0/20',
+ 'ES': '88.0.0.0/11',
+ 'ET': '196.188.0.0/14',
+ 'EU': '2.16.0.0/13',
+ 'FI': '91.152.0.0/13',
+ 'FJ': '144.120.0.0/16',
+ 'FK': '80.73.208.0/21',
+ 'FM': '119.252.112.0/20',
+ 'FO': '88.85.32.0/19',
+ 'FR': '90.0.0.0/9',
+ 'GA': '41.158.0.0/15',
+ 'GB': '25.0.0.0/8',
+ 'GD': '74.122.88.0/21',
+ 'GE': '31.146.0.0/16',
+ 'GF': '161.22.64.0/18',
+ 'GG': '62.68.160.0/19',
+ 'GH': '154.160.0.0/12',
+ 'GI': '95.164.0.0/16',
+ 'GL': '88.83.0.0/19',
+ 'GM': '160.182.0.0/15',
+ 'GN': '197.149.192.0/18',
+ 'GP': '104.250.0.0/19',
+ 'GQ': '105.235.224.0/20',
+ 'GR': '94.64.0.0/13',
+ 'GT': '168.234.0.0/16',
+ 'GU': '168.123.0.0/16',
+ 'GW': '197.214.80.0/20',
+ 'GY': '181.41.64.0/18',
+ 'HK': '113.252.0.0/14',
+ 'HN': '181.210.0.0/16',
+ 'HR': '93.136.0.0/13',
+ 'HT': '148.102.128.0/17',
+ 'HU': '84.0.0.0/14',
+ 'ID': '39.192.0.0/10',
+ 'IE': '87.32.0.0/12',
+ 'IL': '79.176.0.0/13',
+ 'IM': '5.62.80.0/20',
+ 'IN': '117.192.0.0/10',
+ 'IO': '203.83.48.0/21',
+ 'IQ': '37.236.0.0/14',
+ 'IR': '2.176.0.0/12',
+ 'IS': '82.221.0.0/16',
+ 'IT': '79.0.0.0/10',
+ 'JE': '87.244.64.0/18',
+ 'JM': '72.27.0.0/17',
+ 'JO': '176.29.0.0/16',
+ 'JP': '133.0.0.0/8',
+ 'KE': '105.48.0.0/12',
+ 'KG': '158.181.128.0/17',
+ 'KH': '36.37.128.0/17',
+ 'KI': '103.25.140.0/22',
+ 'KM': '197.255.224.0/20',
+ 'KN': '198.167.192.0/19',
+ 'KP': '175.45.176.0/22',
+ 'KR': '175.192.0.0/10',
+ 'KW': '37.36.0.0/14',
+ 'KY': '64.96.0.0/15',
+ 'KZ': '2.72.0.0/13',
+ 'LA': '115.84.64.0/18',
+ 'LB': '178.135.0.0/16',
+ 'LC': '24.92.144.0/20',
+ 'LI': '82.117.0.0/19',
+ 'LK': '112.134.0.0/15',
+ 'LR': '102.183.0.0/16',
+ 'LS': '129.232.0.0/17',
+ 'LT': '78.56.0.0/13',
+ 'LU': '188.42.0.0/16',
+ 'LV': '46.109.0.0/16',
+ 'LY': '41.252.0.0/14',
+ 'MA': '105.128.0.0/11',
+ 'MC': '88.209.64.0/18',
+ 'MD': '37.246.0.0/16',
+ 'ME': '178.175.0.0/17',
+ 'MF': '74.112.232.0/21',
+ 'MG': '154.126.0.0/17',
+ 'MH': '117.103.88.0/21',
+ 'MK': '77.28.0.0/15',
+ 'ML': '154.118.128.0/18',
+ 'MM': '37.111.0.0/17',
+ 'MN': '49.0.128.0/17',
+ 'MO': '60.246.0.0/16',
+ 'MP': '202.88.64.0/20',
+ 'MQ': '109.203.224.0/19',
+ 'MR': '41.188.64.0/18',
+ 'MS': '208.90.112.0/22',
+ 'MT': '46.11.0.0/16',
+ 'MU': '105.16.0.0/12',
+ 'MV': '27.114.128.0/18',
+ 'MW': '102.70.0.0/15',
+ 'MX': '187.192.0.0/11',
+ 'MY': '175.136.0.0/13',
+ 'MZ': '197.218.0.0/15',
+ 'NA': '41.182.0.0/16',
+ 'NC': '101.101.0.0/18',
+ 'NE': '197.214.0.0/18',
+ 'NF': '203.17.240.0/22',
+ 'NG': '105.112.0.0/12',
+ 'NI': '186.76.0.0/15',
+ 'NL': '145.96.0.0/11',
+ 'NO': '84.208.0.0/13',
+ 'NP': '36.252.0.0/15',
+ 'NR': '203.98.224.0/19',
+ 'NU': '49.156.48.0/22',
+ 'NZ': '49.224.0.0/14',
+ 'OM': '5.36.0.0/15',
+ 'PA': '186.72.0.0/15',
+ 'PE': '186.160.0.0/14',
+ 'PF': '123.50.64.0/18',
+ 'PG': '124.240.192.0/19',
+ 'PH': '49.144.0.0/13',
+ 'PK': '39.32.0.0/11',
+ 'PL': '83.0.0.0/11',
+ 'PM': '70.36.0.0/20',
+ 'PR': '66.50.0.0/16',
+ 'PS': '188.161.0.0/16',
+ 'PT': '85.240.0.0/13',
+ 'PW': '202.124.224.0/20',
+ 'PY': '181.120.0.0/14',
+ 'QA': '37.210.0.0/15',
+ 'RE': '102.35.0.0/16',
+ 'RO': '79.112.0.0/13',
+ 'RS': '93.86.0.0/15',
+ 'RU': '5.136.0.0/13',
+ 'RW': '41.186.0.0/16',
+ 'SA': '188.48.0.0/13',
+ 'SB': '202.1.160.0/19',
+ 'SC': '154.192.0.0/11',
+ 'SD': '102.120.0.0/13',
+ 'SE': '78.64.0.0/12',
+ 'SG': '8.128.0.0/10',
+ 'SI': '188.196.0.0/14',
+ 'SK': '78.98.0.0/15',
+ 'SL': '102.143.0.0/17',
+ 'SM': '89.186.32.0/19',
+ 'SN': '41.82.0.0/15',
+ 'SO': '154.115.192.0/18',
+ 'SR': '186.179.128.0/17',
+ 'SS': '105.235.208.0/21',
+ 'ST': '197.159.160.0/19',
+ 'SV': '168.243.0.0/16',
+ 'SX': '190.102.0.0/20',
+ 'SY': '5.0.0.0/16',
+ 'SZ': '41.84.224.0/19',
+ 'TC': '65.255.48.0/20',
+ 'TD': '154.68.128.0/19',
+ 'TG': '196.168.0.0/14',
+ 'TH': '171.96.0.0/13',
+ 'TJ': '85.9.128.0/18',
+ 'TK': '27.96.24.0/21',
+ 'TL': '180.189.160.0/20',
+ 'TM': '95.85.96.0/19',
+ 'TN': '197.0.0.0/11',
+ 'TO': '175.176.144.0/21',
+ 'TR': '78.160.0.0/11',
+ 'TT': '186.44.0.0/15',
+ 'TV': '202.2.96.0/19',
+ 'TW': '120.96.0.0/11',
+ 'TZ': '156.156.0.0/14',
+ 'UA': '37.52.0.0/14',
+ 'UG': '102.80.0.0/13',
+ 'US': '6.0.0.0/8',
+ 'UY': '167.56.0.0/13',
+ 'UZ': '84.54.64.0/18',
+ 'VA': '212.77.0.0/19',
+ 'VC': '207.191.240.0/21',
+ 'VE': '186.88.0.0/13',
+ 'VG': '66.81.192.0/20',
+ 'VI': '146.226.0.0/16',
+ 'VN': '14.160.0.0/11',
+ 'VU': '202.80.32.0/20',
+ 'WF': '117.20.32.0/21',
+ 'WS': '202.4.32.0/19',
+ 'YE': '134.35.0.0/16',
+ 'YT': '41.242.116.0/22',
+ 'ZA': '41.0.0.0/11',
+ 'ZM': '102.144.0.0/13',
+ 'ZW': '102.177.192.0/18',
+ }
+
+ @classmethod
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
+ addr, preflen = block.split('/')
+ addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_max = addr_min | (0xffffffff >> int(preflen))
+ return compat_str(socket.inet_ntoa(
+ compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+
+
+class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+ def __init__(self, proxies=None):
+ # Set default handlers
+ for type in ('http', 'https'):
+ setattr(self, '%s_open' % type,
+ lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
+ compat_urllib_request.ProxyHandler.__init__(self, proxies)
+
+ def proxy_open(self, req, proxy, type):
+ req_proxy = req.headers.get('Ytdl-request-proxy')
+ if req_proxy is not None:
+ proxy = req_proxy
+ del req.headers['Ytdl-request-proxy']
+
+ if proxy == '__noproxy__':
+ return None # No Proxy
+ if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ req.add_header('Ytdl-socks-proxy', proxy)
+ # hypervideo's http/https handlers do wrapping the socket with socks
+ return None
+ return compat_urllib_request.ProxyHandler.proxy_open(
+ self, req, proxy, type)
+
+
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b''
+ n = int(n)
+ while n > 0:
+ s = compat_struct_pack('>I', n & 0xffffffff) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b'\000'[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b'\000'
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b'\000' + s
+ return s
+
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b'\000' * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ return acc
+
+
+def ohdave_rsa_encrypt(data, exponent, modulus):
+ '''
+ Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
+
+ Input:
+ data: data to encrypt, bytes-like object
+ exponent, modulus: parameter e and N of RSA algorithm, both integer
+ Output: hex string of encrypted data
+
+ Limitation: supports one block encryption only
+ '''
+
+ payload = int(binascii.hexlify(data[::-1]), 16)
+ encrypted = pow(payload, exponent, modulus)
+ return '%x' % encrypted
+
+
+def pkcs1pad(data, length):
+ """
+ Padding input data with PKCS#1 scheme
+
+ @param {int[]} data input data
+ @param {int} length target length
+ @returns {int[]} padded data
+ """
+ if len(data) > length - 11:
+ raise ValueError('Input data too long for PKCS#1 padding')
+
+ pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+ return [0, 2] + pseudo_random + [0] + data
+
+
+def encode_base_n(num, n, table=None):
+ FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ if not table:
+ table = FULL_TABLE[:n]
+
+ if n > len(table):
+ raise ValueError('base %d exceeds table length %d' % (n, len(table)))
+
+ if num == 0:
+ return table[0]
+
+ ret = ''
+ while num:
+ ret = table[num % n] + ret
+ num = num // n
+ return ret
+
+
+def decode_packed_codes(code):
+ mobj = re.search(PACKED_CODES_RE, code)
+ obfuscated_code, base, count, symbols = mobj.groups()
+ base = int(base)
+ count = int(count)
+ symbols = symbols.split('|')
+ symbol_table = {}
+
+ while count:
+ count -= 1
+ base_n_count = encode_base_n(count, base)
+ symbol_table[base_n_count] = symbols[count] or base_n_count
+
+ return re.sub(
+ r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
+ obfuscated_code)
+
+
+def caesar(s, alphabet, shift):
+ if shift == 0:
+ return s
+ l = len(alphabet)
+ return ''.join(
+ alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
+ for c in s)
+
+
+def rot47(s):
+ return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
+
+
+def parse_m3u8_attributes(attrib):
+ info = {}
+ for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+ if val.startswith('"'):
+ val = val[1:-1]
+ info[key] = val
+ return info
+
+
+def urshift(val, n):
+ return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+
+# Based on png2str() written by @gdkchan and improved by @yokrysty
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
+def decode_png(png_data):
+ # Reference: https://www.w3.org/TR/PNG/
+ header = png_data[8:]
+
+ if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
+ raise IOError('Not a valid PNG file.')
+
+ int_map = {1: '>B', 2: '>H', 4: '>I'}
+ unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+
+ chunks = []
+
+ while header:
+ length = unpack_integer(header[:4])
+ header = header[4:]
+
+ chunk_type = header[:4]
+ header = header[4:]
+
+ chunk_data = header[:length]
+ header = header[length:]
+
+ header = header[4:] # Skip CRC
+
+ chunks.append({
+ 'type': chunk_type,
+ 'length': length,
+ 'data': chunk_data
+ })
+
+ ihdr = chunks[0]['data']
+
+ width = unpack_integer(ihdr[:4])
+ height = unpack_integer(ihdr[4:8])
+
+ idat = b''
+
+ for chunk in chunks:
+ if chunk['type'] == b'IDAT':
+ idat += chunk['data']
+
+ if not idat:
+ raise IOError('Unable to read PNG data.')
+
+ decompressed_data = bytearray(zlib.decompress(idat))
+
+ stride = width * 3
+ pixels = []
+
+ def _get_pixel(idx):
+ x = idx % stride
+ y = idx // stride
+ return pixels[y][x]
+
+ for y in range(height):
+ basePos = y * (1 + stride)
+ filter_type = decompressed_data[basePos]
+
+ current_row = []
+
+ pixels.append(current_row)
+
+ for x in range(stride):
+ color = decompressed_data[1 + basePos + x]
+ basex = y * stride + x
+ left = 0
+ up = 0
+
+ if x > 2:
+ left = _get_pixel(basex - 3)
+ if y > 0:
+ up = _get_pixel(basex - stride)
+
+ if filter_type == 1: # Sub
+ color = (color + left) & 0xff
+ elif filter_type == 2: # Up
+ color = (color + up) & 0xff
+ elif filter_type == 3: # Average
+ color = (color + ((left + up) >> 1)) & 0xff
+ elif filter_type == 4: # Paeth
+ a = left
+ b = up
+ c = 0
+
+ if x > 2 and y > 0:
+ c = _get_pixel(basex - stride - 3)
+
+ p = a + b - c
+
+ pa = abs(p - a)
+ pb = abs(p - b)
+ pc = abs(p - c)
+
+ if pa <= pb and pa <= pc:
+ color = (color + a) & 0xff
+ elif pb <= pc:
+ color = (color + b) & 0xff
+ else:
+ color = (color + c) & 0xff
+
+ current_row.append(color)
+
+ return width, height, pixels
+
+
+def write_xattr(path, key, value):
+ # This mess below finds the best xattr tool for the job
+ try:
+ # try the pyxattr module...
+ import xattr
+
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'hypervideo requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
+
+ try:
+ setxattr(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+
+ except ImportError:
+ if compat_os_name == 'nt':
+ # Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ ads_fn = path + ':' + key
+ try:
+ with open(ads_fn, 'wb') as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ else:
+ user_has_setfattr = check_executable('setfattr', ['--version'])
+ user_has_xattr = check_executable('xattr', ['-h'])
+
+ if user_has_setfattr or user_has_xattr:
+
+ value = value.decode('utf-8')
+ if user_has_setfattr:
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
+ elif user_has_xattr:
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)]
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
+
+ else:
+ # On Unix, and can't find pyxattr, setfattr, or xattr.
+ if sys.platform.startswith('linux'):
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'pyxattr' or 'xattr' "
+ "modules, or the GNU 'attr' package "
+ "(which contains the 'setfattr' tool).")
+ else:
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'xattr' module, "
+ "or the 'xattr' binary.")
+
+
+def random_birthday(year_field, month_field, day_field):
+ start_date = datetime.date(1950, 1, 1)
+ end_date = datetime.date(1995, 12, 31)
+ offset = random.randint(0, (end_date - start_date).days)
+ random_date = start_date + datetime.timedelta(offset)
+ return {
+ year_field: str(random_date.year),
+ month_field: str(random_date.month),
+ day_field: str(random_date.day),
+ }
+
+
+def clean_podcast_url(url):
+ return re.sub(r'''(?x)
+ (?:
+ (?:
+ chtbl\.com/track|
+ media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
+ play\.podtrac\.com
+ )/[^/]+|
+ (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
+ flex\.acast\.com|
+ pd(?:
+ cn\.co| # https://podcorn.com/analytics-prefix/
+ st\.fm # https://podsights.com/docs/
+ )/e
+ )/''', '', url)
diff --git a/hypervideo_dl/version.py b/hypervideo_dl/version.py
new file mode 100644
index 0000000..4b768be
--- /dev/null
+++ b/hypervideo_dl/version.py
@@ -0,0 +1,3 @@
+from __future__ import unicode_literals
+
+__version__ = '1.1.11'
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..6f03c9b
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[wheel]
+universal = True
+
+[flake8]
+exclude = hypervideo_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
+ignore = E402,E501,E731,E741,W503
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..cb9de29
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import print_function
+
+import os.path
+import warnings
+import sys
+
+try:
+ from setuptools import setup, Command
+ setuptools_available = True
+except ImportError:
+ from distutils.core import setup, Command
+ setuptools_available = False
+from distutils.spawn import spawn
+
+try:
+ # This will create an exe that needs Microsoft Visual C++ 2008
+ # Redistributable Package
+ import py2exe
+except ImportError:
+ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
+ print('Cannot import py2exe', file=sys.stderr)
+ exit(1)
+
+py2exe_options = {
+ 'bundle_files': 1,
+ 'compressed': 1,
+ 'optimize': 2,
+ 'dist_dir': '.',
+ 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
+}
+
+# Get the version from hypervideo_dl/version.py without importing the package
+exec(compile(open('hypervideo_dl/version.py').read(),
+ 'hypervideo_dl/version.py', 'exec'))
+
+DESCRIPTION = 'YouTube video downloader'
+LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
+
+py2exe_console = [{
+ 'script': './hypervideo_dl/__main__.py',
+ 'dest_base': 'hypervideo',
+ 'version': __version__,
+ 'description': DESCRIPTION,
+ 'comments': LONG_DESCRIPTION,
+ 'product_name': 'hypervideo',
+ 'product_version': __version__,
+}]
+
+py2exe_params = {
+ 'console': py2exe_console,
+ 'options': {'py2exe': py2exe_options},
+ 'zipfile': None
+}
+
+if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
+ params = py2exe_params
+else:
+ files_spec = [
+ ('etc/bash_completion.d', ['hypervideo.bash-completion']),
+ ('etc/fish/completions', ['hypervideo.fish']),
+ ('share/doc/hypervideo_dl', ['README.txt']),
+ ('share/man/man1', ['hypervideo.1'])
+ ]
+ root = os.path.dirname(os.path.abspath(__file__))
+ data_files = []
+ for dirname, files in files_spec:
+ resfiles = []
+ for fn in files:
+ if not os.path.exists(fn):
+ warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn)
+ else:
+ resfiles.append(fn)
+ data_files.append((dirname, resfiles))
+
+ params = {
+ 'data_files': data_files,
+ }
+ if setuptools_available:
+ params['entry_points'] = {'console_scripts': ['hypervideo = hypervideo_dl:main']}
+ else:
+ params['scripts'] = ['bin/hypervideo']
+
+class build_lazy_extractors(Command):
+ description = 'Build the extractor lazy loading module'
+ user_options = []
+
+ def initialize_options(self):
+ pass
+
+ def finalize_options(self):
+ pass
+
+ def run(self):
+ spawn(
+ [sys.executable, 'devscripts/make_lazy_extractors.py', 'hypervideo_dl/extractor/lazy_extractors.py'],
+ dry_run=self.dry_run,
+ )
+
+setup(
+ name='hypervideo_dl',
+ version=__version__,
+ description=DESCRIPTION,
+ long_description=LONG_DESCRIPTION,
+ url='https://git.conocimientoslibres.ga/software/hypervideo.git',
+ author='Ricardo Garcia',
+ author_email='ytdl@yt-dl.org',
+ maintainer='Jesús E..',
+ maintainer_email='heckyel@hyperbola.info',
+ license='CC0-1.0',
+ packages=[
+ 'hypervideo_dl',
+ 'hypervideo_dl.extractor', 'hypervideo_dl.downloader',
+ 'hypervideo_dl.postprocessor'],
+
+ # Provokes warning on most systems (why?!)
+ # test_suite = 'nose.collector',
+ # test_requires = ['nosetest'],
+
+ classifiers=[
+ 'Topic :: Multimedia :: Video',
+ 'Development Status :: 5 - Production/Stable',
+ 'Environment :: Console',
+ 'License :: Public Domain',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.6',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.2',
+ 'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: Implementation',
+ 'Programming Language :: Python :: Implementation :: CPython',
+ 'Programming Language :: Python :: Implementation :: IronPython',
+ 'Programming Language :: Python :: Implementation :: Jython',
+ 'Programming Language :: Python :: Implementation :: PyPy',
+ ],
+
+ cmdclass={'build_lazy_extractors': build_lazy_extractors},
+ **params
+)
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/__init__.py
diff --git a/test/helper.py b/test/helper.py
new file mode 100644
index 0000000..6eb9298
--- /dev/null
+++ b/test/helper.py
@@ -0,0 +1,282 @@
+from __future__ import unicode_literals
+
+import errno
+import io
+import hashlib
+import json
+import os.path
+import re
+import types
+import ssl
+import sys
+
+import hypervideo_dl.extractor
+from hypervideo_dl import YoutubeDL
+from hypervideo_dl.compat import (
+ compat_os_name,
+ compat_str,
+)
+from hypervideo_dl.utils import (
+ preferredencoding,
+ write_string,
+)
+
+
+def get_params(override=None):
+ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ "parameters.json")
+ LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ "local_parameters.json")
+ with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+ parameters = json.load(pf)
+ if os.path.exists(LOCAL_PARAMETERS_FILE):
+ with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+ parameters.update(json.load(pf))
+ if override:
+ parameters.update(override)
+ return parameters
+
+
+def try_rm(filename):
+ """ Remove a file if it exists """
+ try:
+ os.remove(filename)
+ except OSError as ose:
+ if ose.errno != errno.ENOENT:
+ raise
+
+
+def report_warning(message):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if sys.stderr.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;33mWARNING:\033[0m'
+ else:
+ _msg_header = 'WARNING:'
+ output = '%s %s\n' % (_msg_header, message)
+ if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
+ output = output.encode(preferredencoding())
+ sys.stderr.write(output)
+
+
+class FakeYDL(YoutubeDL):
+ def __init__(self, override=None):
+ # Different instances of the downloader can't share the same dictionary
+ # some test set the "sublang" parameter, which would break the md5 checks.
+ params = get_params(override=override)
+ super(FakeYDL, self).__init__(params, auto_init=False)
+ self.result = []
+
+ def to_screen(self, s, skip_eol=None):
+ print(s)
+
+ def trouble(self, s, tb=None):
+ raise Exception(s)
+
+ def download(self, x):
+ self.result.append(x)
+
+ def expect_warning(self, regex):
+ # Silence an expected warning matching a regex
+ old_report_warning = self.report_warning
+
+ def report_warning(self, message):
+ if re.match(regex, message):
+ return
+ old_report_warning(message)
+ self.report_warning = types.MethodType(report_warning, self)
+
+
+def gettestcases(include_onlymatching=False):
+ for ie in hypervideo_dl.extractor.gen_extractors():
+ for tc in ie.get_testcases(include_onlymatching):
+ yield tc
+
+
+md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+def expect_value(self, got, expected, field):
+ if isinstance(expected, compat_str) and expected.startswith('re:'):
+ match_str = expected[len('re:'):]
+ match_rex = re.compile(match_str)
+
+ self.assertTrue(
+ isinstance(got, compat_str),
+ 'Expected a %s object, but got %s for field %s' % (
+ compat_str.__name__, type(got).__name__, field))
+ self.assertTrue(
+ match_rex.match(got),
+ 'field %s (value: %r) should match %r' % (field, got, match_str))
+ elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
+ start_str = expected[len('startswith:'):]
+ self.assertTrue(
+ isinstance(got, compat_str),
+ 'Expected a %s object, but got %s for field %s' % (
+ compat_str.__name__, type(got).__name__, field))
+ self.assertTrue(
+ got.startswith(start_str),
+ 'field %s (value: %r) should start with %r' % (field, got, start_str))
+ elif isinstance(expected, compat_str) and expected.startswith('contains:'):
+ contains_str = expected[len('contains:'):]
+ self.assertTrue(
+ isinstance(got, compat_str),
+ 'Expected a %s object, but got %s for field %s' % (
+ compat_str.__name__, type(got).__name__, field))
+ self.assertTrue(
+ contains_str in got,
+ 'field %s (value: %r) should contain %r' % (field, got, contains_str))
+ elif isinstance(expected, type):
+ self.assertTrue(
+ isinstance(got, expected),
+ 'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got)))
+ elif isinstance(expected, dict) and isinstance(got, dict):
+ expect_dict(self, got, expected)
+ elif isinstance(expected, list) and isinstance(got, list):
+ self.assertEqual(
+ len(expected), len(got),
+ 'Expect a list of length %d, but got a list of length %d for field %s' % (
+ len(expected), len(got), field))
+ for index, (item_got, item_expected) in enumerate(zip(got, expected)):
+ type_got = type(item_got)
+ type_expected = type(item_expected)
+ self.assertEqual(
+ type_expected, type_got,
+ 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % (
+ index, field, type_expected, type_got))
+ expect_value(self, item_got, item_expected, field)
+ else:
+ if isinstance(expected, compat_str) and expected.startswith('md5:'):
+ self.assertTrue(
+ isinstance(got, compat_str),
+ 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
+ got = 'md5:' + md5(got)
+ elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
+ self.assertTrue(
+ isinstance(got, (list, dict)),
+ 'Expected field %s to be a list or a dict, but it is of type %s' % (
+ field, type(got).__name__))
+ op, _, expected_num = expected.partition(':')
+ expected_num = int(expected_num)
+ if op == 'mincount':
+ assert_func = assertGreaterEqual
+ msg_tmpl = 'Expected %d items in field %s, but only got %d'
+ elif op == 'maxcount':
+ assert_func = assertLessEqual
+ msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
+ elif op == 'count':
+ assert_func = assertEqual
+ msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
+ else:
+ assert False
+ assert_func(
+ self, len(got), expected_num,
+ msg_tmpl % (expected_num, field, len(got)))
+ return
+ self.assertEqual(
+ expected, got,
+ 'Invalid value for field %s, expected %r, got %r' % (field, expected, got))
+
+
+def expect_dict(self, got_dict, expected_dict):
+ for info_field, expected in expected_dict.items():
+ got = got_dict.get(info_field)
+ expect_value(self, got, expected, info_field)
+
+
+def expect_info_dict(self, got_dict, expected_dict):
+ expect_dict(self, got_dict, expected_dict)
+ # Check for the presence of mandatory fields
+ if got_dict.get('_type') not in ('playlist', 'multi_video'):
+ for key in ('id', 'url', 'title', 'ext'):
+ self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+ # Check for mandatory fields that are automatically set by YoutubeDL
+ for key in ['webpage_url', 'extractor', 'extractor_key']:
+ self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
+
+ # Are checkable fields missing from the test case definition?
+ test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+ for key, value in got_dict.items()
+ if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
+ missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+ if missing_keys:
+ def _repr(v):
+ if isinstance(v, compat_str):
+ return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
+ else:
+ return repr(v)
+ info_dict_str = ''
+ if len(missing_keys) != len(expected_dict):
+ info_dict_str += ''.join(
+ ' %s: %s,\n' % (_repr(k), _repr(v))
+ for k, v in test_info_dict.items() if k not in missing_keys)
+
+ if info_dict_str:
+ info_dict_str += '\n'
+ info_dict_str += ''.join(
+ ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
+ for k in missing_keys)
+ write_string(
+ '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
+ self.assertFalse(
+ missing_keys,
+ 'Missing keys in test definition: %s' % (
+ ', '.join(sorted(missing_keys))))
+
+
+def assertRegexpMatches(self, text, regexp, msg=None):
+ if hasattr(self, 'assertRegexp'):
+ return self.assertRegexp(text, regexp, msg)
+ else:
+ m = re.match(regexp, text)
+ if not m:
+ note = 'Regexp didn\'t match: %r not found' % (regexp)
+ if len(text) < 1000:
+ note += ' in %r' % text
+ if msg is None:
+ msg = note
+ else:
+ msg = note + ', ' + msg
+ self.assertTrue(m, msg)
+
+
+def assertGreaterEqual(self, got, expected, msg=None):
+ if not (got >= expected):
+ if msg is None:
+ msg = '%r not greater than or equal to %r' % (got, expected)
+ self.assertTrue(got >= expected, msg)
+
+
+def assertLessEqual(self, got, expected, msg=None):
+ if not (got <= expected):
+ if msg is None:
+ msg = '%r not less than or equal to %r' % (got, expected)
+ self.assertTrue(got <= expected, msg)
+
+
+def assertEqual(self, got, expected, msg=None):
+ if not (got == expected):
+ if msg is None:
+ msg = '%r not equal to %r' % (got, expected)
+ self.assertTrue(got == expected, msg)
+
+
+def expect_warnings(ydl, warnings_re):
+ real_warning = ydl.report_warning
+
+ def _report_warning(w):
+ if not any(re.search(w_re, w) for w_re in warnings_re):
+ real_warning(w)
+
+ ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
diff --git a/test/parameters.json b/test/parameters.json
new file mode 100644
index 0000000..65fd544
--- /dev/null
+++ b/test/parameters.json
@@ -0,0 +1,43 @@
+{
+ "consoletitle": false,
+ "continuedl": true,
+ "forcedescription": false,
+ "forcefilename": false,
+ "forceformat": false,
+ "forcethumbnail": false,
+ "forcetitle": false,
+ "forceurl": false,
+ "format": "best",
+ "ignoreerrors": false,
+ "listformats": null,
+ "logtostderr": false,
+ "matchtitle": null,
+ "max_downloads": null,
+ "nooverwrites": false,
+ "nopart": false,
+ "noprogress": false,
+ "outtmpl": "%(id)s.%(ext)s",
+ "password": null,
+ "playlistend": -1,
+ "playliststart": 1,
+ "prefer_free_formats": false,
+ "quiet": false,
+ "ratelimit": null,
+ "rejecttitle": null,
+ "retries": 10,
+ "simulate": false,
+ "subtitleslang": null,
+ "subtitlesformat": "best",
+ "test": true,
+ "updatetime": true,
+ "usenetrc": false,
+ "username": null,
+ "verbose": true,
+ "writedescription": false,
+ "writeinfojson": true,
+ "writesubtitles": false,
+ "allsubtitles": false,
+ "listsubtitles": false,
+ "socket_timeout": 20,
+ "fixup": "never"
+}
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
new file mode 100644
index 0000000..5029072
--- /dev/null
+++ b/test/test_InfoExtractor.py
@@ -0,0 +1,1132 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import io
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from hypervideo_dl.compat import compat_etree_fromstring, compat_http_server
+from hypervideo_dl.extractor.common import InfoExtractor
+from hypervideo_dl.extractor import YoutubeIE, get_info_extractor
+from hypervideo_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/teapot':
+ self.send_response(TEAPOT_RESPONSE_STATUS)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ else:
+ assert False
+
+
+class TestIE(InfoExtractor):
+ pass
+
+
+class TestInfoExtractor(unittest.TestCase):
+ def setUp(self):
+ self.ie = TestIE(FakeYDL())
+
+ def test_ie_key(self):
+ self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
+
+ def test_html_search_regex(self):
+ html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
+ search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
+ self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
+
+ def test_opengraph(self):
+ ie = self.ie
+ html = '''
+ <meta name="og:title" content='Foo'/>
+ <meta content="Some video's description " name="og:description"/>
+ <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
+ <meta content='application/x-shockwave-flash' property='og:video:type'>
+ <meta content='Foo' property=og:foobar>
+ <meta name="og:test1" content='foo > < bar'/>
+ <meta name="og:test2" content="foo >//< bar"/>
+ <meta property=og-test3 content='Ill-formatted opengraph'/>
+ '''
+ self.assertEqual(ie._og_search_title(html), 'Foo')
+ self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
+ self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
+ self.assertEqual(ie._og_search_video_url(html, default=None), None)
+ self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
+ self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
+ self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
+ self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
+ self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
+ self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
+ self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
+
+ def test_html_search_meta(self):
+ ie = self.ie
+ html = '''
+ <meta name="a" content="1" />
+ <meta name='b' content='2'>
+ <meta name="c" content='3'>
+ <meta name=d content='4'>
+ <meta property="e" content='5' >
+ <meta content="6" name="f">
+ '''
+
+ self.assertEqual(ie._html_search_meta('a', html), '1')
+ self.assertEqual(ie._html_search_meta('b', html), '2')
+ self.assertEqual(ie._html_search_meta('c', html), '3')
+ self.assertEqual(ie._html_search_meta('d', html), '4')
+ self.assertEqual(ie._html_search_meta('e', html), '5')
+ self.assertEqual(ie._html_search_meta('f', html), '6')
+ self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
+ self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
+ self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
+ self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
+ self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
+
+ def test_search_json_ld_realworld(self):
+ # https://github.com/ytdl-org/youtube-dl/issues/23306
+ expect_dict(
+ self,
+ self.ie._search_json_ld(r'''<script type="application/ld+json">
+{
+"@context": "http://schema.org/",
+"@type": "VideoObject",
+"name": "1 On 1 With Kleio",
+"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
+"duration": "PT0H12M23S",
+"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
+"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
+"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
+"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
+"width": "1920",
+"height": "1080",
+"encodingFormat": "mp4",
+"bitrate": "6617kbps",
+"isFamilyFriendly": "False",
+"description": "Kleio Valentien",
+"uploadDate": "2015-12-05T21:24:35+01:00",
+"interactionStatistic": {
+"@type": "InteractionCounter",
+"interactionType": { "@type": "http://schema.org/WatchAction" },
+"userInteractionCount": 1120958
+}, "aggregateRating": {
+"@type": "AggregateRating",
+"ratingValue": "88",
+"ratingCount": "630",
+"bestRating": "100",
+"worstRating": "0"
+}, "actor": [{
+"@type": "Person",
+"name": "Kleio Valentien",
+"url": "https://www.eporner.com/pornstar/kleio-valentien/"
+}]}
+</script>''', None),
+ {
+ 'title': '1 On 1 With Kleio',
+ 'description': 'Kleio Valentien',
+ 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
+ 'timestamp': 1449347075,
+ 'duration': 743.0,
+ 'view_count': 1120958,
+ 'width': 1920,
+ 'height': 1080,
+ })
+
+ def test_download_json(self):
+ uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
+ self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
+ uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript')
+ self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'})
+ uri = encode_data_uri(b'{"foo": invalid}', 'application/json')
+ self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
+ self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+
+ def test_parse_html5_media_entries(self):
+ # inline video tag
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://127.0.0.1/video.html',
+ r'<html><video src="/vid.mp4" /></html>', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://127.0.0.1/vid.mp4',
+ }],
+ })
+
+ # from https://www.r18.com/
+ # with kpbs in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.r18.com/',
+ r'''
+ <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps">
+ <p>Your browser does not support the video tag.</p>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '300kbps',
+ 'height': 240,
+ 'tbr': 300,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1000kbps',
+ 'height': 480,
+ 'tbr': 1000,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1500kbps',
+ 'height': 740,
+ 'tbr': 1500,
+ }],
+ 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg'
+ })
+
+ # from https://www.csfd.cz/
+ # with width and height
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.csfd.cz/',
+ r'''
+ <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" >
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080">
+ <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4',
+ 'ext': 'mp4',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4',
+ 'ext': 'mp4',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4',
+ 'ext': 'mp4',
+ 'width': 1920,
+ 'height': 1080,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm',
+ 'ext': 'webm',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm',
+ 'ext': 'webm',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm',
+ 'ext': 'webm',
+ 'width': 1920,
+ 'height': 1080,
+ }],
+ 'subtitles': {
+ 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}]
+ },
+ 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360'
+ })
+
+ # from https://tamasha.com/v/Kkdjw
+ # with height in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://tamasha.com/v/Kkdjw',
+ r'''
+ <video crossorigin="anonymous">
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4"
+ label="240p" res="240"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4"
+ label="144p" res="144"/>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ 'ext': 'mp4',
+ 'format_id': '240p',
+ 'height': 240,
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4',
+ 'ext': 'mp4',
+ 'format_id': '144p',
+ 'height': 144,
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ 'ext': 'mp4',
+ }]
+ })
+
+ # from https://www.klarna.com/uk/
+ # with data-video-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video loop autoplay muted class="responsive-video block-kl__video video-on-medium">
+ <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4',
+ 'ext': 'mp4',
+ }],
+ })
+
+ def test_extract_jwplayer_data_realworld(self):
+ # from http://www.suffolk.edu/sjc/
+ expect_dict(
+ self,
+ self.ie._extract_jwplayer_data(r'''
+ <script type='text/javascript'>
+ jwplayer('my-video').setup({
+ file: 'rtmp://192.138.214.154/live/sjclive',
+ fallback: 'true',
+ width: '95%',
+ aspectratio: '16:9',
+ primary: 'flash',
+ mediaid:'XEgvuql4'
+ });
+ </script>
+ ''', None, require_title=False),
+ {
+ 'id': 'XEgvuql4',
+ 'formats': [{
+ 'url': 'rtmp://192.138.214.154/live/sjclive',
+ 'ext': 'flv'
+ }]
+ })
+
+ # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
+ expect_dict(
+ self,
+ self.ie._extract_jwplayer_data(r'''
+<script type="text/javascript">
+ jwplayer("mediaplayer").setup({
+ 'videoid': "7564",
+ 'width': "100%",
+ 'aspectratio': "16:9",
+ 'stretching': "exactfit",
+ 'autostart': 'false',
+ 'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
+ 'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
+ 'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
+ 'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
+ 'logo.hide': true,
+ 'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
+ 'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
+ 'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
+ 'controlbar': 'bottom',
+ 'modes': [
+ {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
+ ],
+ 'provider': 'http'
+ });
+ //noinspection JSAnnotator
+ invideo.setup({
+ adsUrl: "/banner-iframe/?zoneId=32",
+ adsUrl2: "",
+ autostart: false
+ });
+</script>
+ ''', 'dummy', require_title=False),
+ {
+ 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
+ 'formats': [{
+ 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
+ 'ext': 'flv'
+ }]
+ })
+
+ # from http://www.indiedb.com/games/king-machine/videos
+ expect_dict(
+ self,
+ self.ie._extract_jwplayer_data(r'''
+<script>
+jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
+ videoAnalytics("play");
+}).once("complete", function(event) {
+ videoAnalytics("completed");
+});
+</script>
+ ''', 'dummy'),
+ {
+ 'title': 'king machine trailer 1',
+ 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
+ 'formats': [{
+ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
+ 'height': 360,
+ 'ext': 'mp4'
+ }, {
+ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
+ 'height': 720,
+ 'ext': 'mp4'
+ }]
+ })
+
+ def test_parse_m3u8_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/11507
+ # http://pluzz.francetv.fr/videos/le_ministere.html
+ 'pluzz_francetv_11507',
+ 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ [{
+ 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
+ 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'ext': 'mp4',
+ 'format_id': '180',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.66.30',
+ 'tbr': 180,
+ 'width': 256,
+ 'height': 144,
+ }, {
+ 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
+ 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'ext': 'mp4',
+ 'format_id': '303',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.66.30',
+ 'tbr': 303,
+ 'width': 320,
+ 'height': 180,
+ }, {
+ 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
+ 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'ext': 'mp4',
+ 'format_id': '575',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.66.30',
+ 'tbr': 575,
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
+ 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'ext': 'mp4',
+ 'format_id': '831',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.77.30',
+ 'tbr': 831,
+ 'width': 704,
+ 'height': 396,
+ }, {
+ 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
+ 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ 'format_id': '1467',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.77.30',
+ 'tbr': 1467,
+ 'width': 1024,
+ 'height': 576,
+ }]
+ ),
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/11995
+ # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
+ 'teamcoco_11995',
+ 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ [{
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'audio-0-Default',
+ 'protocol': 'm3u8',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'audio-1-Default',
+ 'protocol': 'm3u8',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '71',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.5',
+ 'vcodec': 'none',
+ 'tbr': 71,
+ }, {
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '413',
+ 'protocol': 'm3u8',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001e',
+ 'tbr': 413,
+ 'width': 400,
+ 'height': 224,
+ }, {
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '522',
+ 'protocol': 'm3u8',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001e',
+ 'tbr': 522,
+ 'width': 400,
+ 'height': 224,
+ }, {
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '1205',
+ 'protocol': 'm3u8',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001e',
+ 'tbr': 1205,
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
+ 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '2374',
+ 'protocol': 'm3u8',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 2374,
+ 'width': 1024,
+ 'height': 576,
+ }]
+ ),
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/12211
+ # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
+ 'toggle_mobile_12211',
+ 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ [{
+ 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
+ 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'audio-English',
+ 'protocol': 'm3u8',
+ 'language': 'eng',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
+ 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'audio-Undefined',
+ 'protocol': 'm3u8',
+ 'language': 'und',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
+ 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '155',
+ 'protocol': 'm3u8',
+ 'tbr': 155.648,
+ 'width': 320,
+ 'height': 180,
+ }, {
+ 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
+ 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '502',
+ 'protocol': 'm3u8',
+ 'tbr': 502.784,
+ 'width': 480,
+ 'height': 270,
+ }, {
+ 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
+ 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '827',
+ 'protocol': 'm3u8',
+ 'tbr': 827.392,
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
+ 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '1396',
+ 'protocol': 'm3u8',
+ 'tbr': 1396.736,
+ 'width': 854,
+ 'height': 480,
+ }]
+ ),
+ (
+ # http://www.twitch.tv/riotgames/v/6528877
+ 'twitch_vod',
+ 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ [{
+ 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
+ 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'ext': 'mp4',
+ 'format_id': 'Audio Only',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 182.725,
+ }, {
+ 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
+ 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'ext': 'mp4',
+ 'format_id': 'Mobile',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.42C00D',
+ 'tbr': 280.474,
+ 'width': 400,
+ 'height': 226,
+ }, {
+ 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
+ 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'ext': 'mp4',
+ 'format_id': 'Low',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.42C01E',
+ 'tbr': 628.347,
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
+ 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'ext': 'mp4',
+ 'format_id': 'Medium',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.42C01E',
+ 'tbr': 893.387,
+ 'width': 852,
+ 'height': 480,
+ }, {
+ 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
+ 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'ext': 'mp4',
+ 'format_id': 'High',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.42C01F',
+ 'tbr': 1603.789,
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
+ 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'ext': 'mp4',
+ 'format_id': 'Source',
+ 'protocol': 'm3u8',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc1.100.31',
+ 'tbr': 3214.134,
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ (
+ # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+ # EXT-X-STREAM-INF tag with NAME attribute that is not defined
+ # in HLS specification
+ 'vidio',
+ 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ [{
+ 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
+ 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '270p 3G',
+ 'protocol': 'm3u8',
+ 'tbr': 300,
+ 'width': 480,
+ 'height': 270,
+ }, {
+ 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
+ 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '360p SD',
+ 'protocol': 'm3u8',
+ 'tbr': 600,
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
+ 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ 'ext': 'mp4',
+ 'format_id': '720p HD',
+ 'protocol': 'm3u8',
+ 'tbr': 1200,
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/18923
+ # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
+ 'ted_18923',
+ 'http://hls.ted.com/talks/31241.m3u8',
+ [{
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '600k-Audio',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '68',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '163',
+ 'acodec': 'none',
+ 'width': 320,
+ 'height': 180,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '481',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '769',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '984',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '1255',
+ 'acodec': 'none',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '1693',
+ 'acodec': 'none',
+ 'width': 853,
+ 'height': 480,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '2462',
+ 'acodec': 'none',
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ ]
+
+ for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+ with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+ mode='r', encoding='utf-8') as f:
+ formats = self.ie._parse_m3u8_formats(
+ f.read(), m3u8_url, ext='mp4')
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+
+ def test_parse_mpd_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/13919
+ # Also tests duplicate representation ids, see
+ # https://github.com/ytdl-org/youtube-dl/issues/15111
+ 'float_duration',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'm4a',
+ 'format_id': '318597',
+ 'format_note': 'DASH audio',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 61.587,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '318597',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001f',
+ 'tbr': 318.597,
+ 'width': 340,
+ 'height': 192,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '638590',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001f',
+ 'tbr': 638.59,
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '1022565',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 1022.565,
+ 'width': 688,
+ 'height': 384,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '2046506',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 2046.506,
+ 'width': 1024,
+ 'height': 576,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '3998017',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640029',
+ 'tbr': 3998.017,
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '5997485',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640032',
+ 'tbr': 5997.485,
+ 'width': 1920,
+ 'height': 1080,
+ }]
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
+ 'urls_only',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_144p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 200,
+ 'width': 256,
+ 'height': 144,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_240p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 400,
+ 'width': 424,
+ 'height': 240,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_360p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 800,
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_480p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 1200,
+ 'width': 856,
+ 'height': 480,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_576p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 1600,
+ 'width': 1024,
+ 'height': 576,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_720p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 2400,
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'h264_aac_1080p_m4s',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'avc3.42c01e',
+ 'tbr': 4400,
+ 'width': 1920,
+ 'height': 1080,
+ }]
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/issues/20346
+ # Media considered unfragmented even though it contains
+ # Initialization tag
+ 'unfragmented',
+ 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url
+ 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url
+ [{
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'm4a',
+ 'format_id': 'AUDIO-1',
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 129.87,
+ 'asr': 48000,
+
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-2',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 608.0,
+ 'width': 240,
+ 'height': 240,
+ 'fps': 30,
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-1',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 804.261,
+ 'width': 360,
+ 'height': 360,
+ 'fps': 30,
+ }]
+ )
+ ]
+
+ for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
+ with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
+ mode='r', encoding='utf-8') as f:
+ formats = self.ie._parse_mpd_formats(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ mpd_base_url=mpd_base_url, mpd_url=mpd_url)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+
+ def test_parse_f4m_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/14660
+ 'custom_base_url',
+ 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ [{
+ 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ 'ext': 'flv',
+ 'format_id': '2148',
+ 'protocol': 'f4m',
+ 'tbr': 2148,
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ ]
+
+ for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+ with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
+ mode='r', encoding='utf-8') as f:
+ formats = self.ie._parse_f4m_formats(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ f4m_url, None)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+
+ def test_parse_xspf(self):
+ _TEST_CASES = [
+ (
+ 'foo_xspf',
+ 'https://example.org/src/foo_xspf.xspf',
+ [{
+ 'id': 'foo_xspf',
+ 'title': 'Pandemonium',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 202.416,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/src/cd1/track%201.mp3',
+ }],
+ }, {
+ 'id': 'foo_xspf',
+ 'title': 'Final Cartridge (Nichico Twelve Remix)',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 255.857,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
+ }],
+ }, {
+ 'id': 'foo_xspf',
+ 'title': 'Rebuilding Nightingale',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 287.915,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/src/track3.mp3',
+ }, {
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.com/track3.mp3',
+ }]
+ }]
+ ),
+ ]
+
+ for xspf_file, xspf_url, expected_entries in _TEST_CASES:
+ with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
+ mode='r', encoding='utf-8') as f:
+ entries = self.ie._parse_xspf(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
+ expect_value(self, entries, expected_entries, None)
+ for i in range(len(entries)):
+ expect_dict(self, entries[i], expected_entries[i])
+
+ def test_response_with_expected_status_returns_content(self):
+ # Checks for mitigations against the effects of
+ # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
+ # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+ # or the underlying `_download_webpage_handle` returning no content
+ # when a response matches `expected_status`.
+
+ httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ port = http_server_port(httpd)
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ (content, urlh) = self.ie._download_webpage_handle(
+ 'http://127.0.0.1:%d/teapot' % port, None,
+ expected_status=TEAPOT_RESPONSE_STATUS)
+ self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
new file mode 100644
index 0000000..e48befd
--- /dev/null
+++ b/test/test_YoutubeDL.py
@@ -0,0 +1,1002 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import copy
+
+from test.helper import FakeYDL, assertRegexpMatches
+from hypervideo_dl import YoutubeDL
+from hypervideo_dl.compat import compat_str, compat_urllib_error
+from hypervideo_dl.extractor import YoutubeIE
+from hypervideo_dl.extractor.common import InfoExtractor
+from hypervideo_dl.postprocessor.common import PostProcessor
+from hypervideo_dl.utils import ExtractorError, match_filter_func
+
+TEST_URL = 'http://localhost/sample.mp4'
+
+
+class YDL(FakeYDL):
+ def __init__(self, *args, **kwargs):
+ super(YDL, self).__init__(*args, **kwargs)
+ self.downloaded_info_dicts = []
+ self.msgs = []
+
+ def process_info(self, info_dict):
+ self.downloaded_info_dicts.append(info_dict)
+
+ def to_screen(self, msg):
+ self.msgs.append(msg)
+
+
+def _make_result(formats, **kwargs):
+ res = {
+ 'formats': formats,
+ 'id': 'testid',
+ 'title': 'testttitle',
+ 'extractor': 'testex',
+ 'extractor_key': 'TestEx',
+ }
+ res.update(**kwargs)
+ return res
+
+
+class TestFormatSelection(unittest.TestCase):
+ def test_prefer_free_formats(self):
+ # Same resolution => download webm
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = True
+ formats = [
+ {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'webm')
+
+ # Different resolution => download best quality (mp4)
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = True
+ formats = [
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
+ ]
+ info_dict['formats'] = formats
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'mp4')
+
+ # No prefer_free_formats => prefer mp4 and flv for greater compatibility
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = False
+ formats = [
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+ ]
+ info_dict['formats'] = formats
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'mp4')
+
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = False
+ formats = [
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ ]
+ info_dict['formats'] = formats
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['ext'], 'flv')
+
+ def test_format_selection(self):
+ formats = [
+ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
+ {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+ {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+ {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': '20/47'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '47')
+
+ ydl = YDL({'format': '20/71/worst'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '35')
+
+ ydl = YDL()
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '2')
+
+ ydl = YDL({'format': 'webm/mp4'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '47')
+
+ ydl = YDL({'format': '3gp/40/mp4'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '35')
+
+ ydl = YDL({'format': 'example-with-dashes'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'example-with-dashes')
+
+ def test_format_selection_audio(self):
+ formats = [
+ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'audio-high')
+
+ ydl = YDL({'format': 'worstaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'audio-low')
+
+ formats = [
+ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestaudio/worstaudio/best'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vid-high')
+
+ def test_format_selection_audio_exts(self):
+ formats = [
+ {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+ ]
+
+ info_dict = _make_result(formats)
+ ydl = YDL({'format': 'best'})
+ ie = YoutubeIE(ydl)
+ ie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'aac-64')
+
+ ydl = YDL({'format': 'mp3'})
+ ie = YoutubeIE(ydl)
+ ie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'mp3-64')
+
+ ydl = YDL({'prefer_free_formats': True})
+ ie = YoutubeIE(ydl)
+ ie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'ogg-64')
+
+ def test_format_selection_video(self):
+ formats = [
+ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestvideo'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'dash-video-high')
+
+ ydl = YDL({'format': 'worstvideo'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
+ ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
+ formats = [
+ {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+
+ def test_format_selection_string_ops(self):
+ formats = [
+ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
+ {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # equals (=)
+ ydl = YDL({'format': '[format_id=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not equal (!=)
+ ydl = YDL({'format': '[format_id!=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # starts with (^=)
+ ydl = YDL({'format': '[format_id^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not start with (!^=)
+ ydl = YDL({'format': '[format_id!^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # ends with ($=)
+ ydl = YDL({'format': '[format_id$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not end with (!$=)
+ ydl = YDL({'format': '[format_id!$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # contains (*=)
+ ydl = YDL({'format': '[format_id*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not contain (!*=)
+ ydl = YDL({'format': '[format_id!*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ ydl = YDL({'format': '[format_id!*=-]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ def test_youtube_format_selection(self):
+ order = [
+ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
+ # Apple HTTP Live Streaming
+ '96', '95', '94', '93', '92', '132', '151',
+ # 3D
+ '85', '84', '102', '83', '101', '82', '100',
+ # Dash video
+ '137', '248', '136', '247', '135', '246',
+ '245', '244', '134', '243', '133', '242', '160',
+ # Dash audio
+ '141', '172', '140', '171', '139',
+ ]
+
+ def format_info(f_id):
+ info = YoutubeIE._formats[f_id].copy()
+
+ # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
+ # and 'vcodec', while in tests such information is incomplete since
+ # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
+ # test_YoutubeDL.test_youtube_format_selection is broken without
+ # this fix
+ if 'acodec' in info and 'vcodec' not in info:
+ info['vcodec'] = 'none'
+ elif 'vcodec' in info and 'acodec' not in info:
+ info['acodec'] = 'none'
+
+ info['format_id'] = f_id
+ info['url'] = 'url:' + f_id
+ return info
+ formats_order = [format_info(f_id) for f_id in order]
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': 'bestvideo+bestaudio'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '137+141')
+ self.assertEqual(downloaded['ext'], 'mp4')
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], '38')
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': 'bestvideo/best,bestaudio'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['137', '141'])
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['137+141', '248+141'])
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['136+141', '247+141'])
+
+ info_dict = _make_result(list(formats_order), extractor='youtube')
+ ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['248+141'])
+
+ for f1, f2 in zip(formats_order, formats_order[1:]):
+ info_dict = _make_result([f1, f2], extractor='youtube')
+ ydl = YDL({'format': 'best/bestvideo'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], f1['format_id'])
+
+ info_dict = _make_result([f2, f1], extractor='youtube')
+ ydl = YDL({'format': 'best/bestvideo'})
+ yie = YoutubeIE(ydl)
+ yie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], f1['format_id'])
+
+ def test_audio_only_extractor_format_selection(self):
+ # For extractors with incomplete formats (all formats are audio-only or
+ # video-only) best and worst should fallback to corresponding best/worst
+ # video-only or audio-only formats (as per
+ # https://github.com/ytdl-org/youtube-dl/pull/5556)
+ formats = [
+ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'high')
+
+ ydl = YDL({'format': 'worst'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'low')
+
+ def test_format_not_available(self):
+ formats = [
+ {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
+ {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # This must fail since complete video-audio format does not match filter
+ # and extractor does not provide incomplete only formats (i.e. only
+ # video-only or audio-only).
+ ydl = YDL({'format': 'best[height>360]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ def test_format_selection_issue_10083(self):
+ # See https://github.com/ytdl-org/youtube-dl/issues/10083
+ formats = [
+ {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
+ {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
+
+ def test_invalid_format_specs(self):
+ def assert_syntax_error(format_spec):
+ ydl = YDL({'format': format_spec})
+ info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
+ self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
+
+ assert_syntax_error('bestvideo,,best')
+ assert_syntax_error('+bestaudio')
+ assert_syntax_error('bestvideo+')
+ assert_syntax_error('/')
+ assert_syntax_error('bestvideo+bestvideo+bestaudio')
+
+ def test_format_filtering(self):
+ formats = [
+ {'format_id': 'A', 'filesize': 500, 'width': 1000},
+ {'format_id': 'B', 'filesize': 1000, 'width': 500},
+ {'format_id': 'C', 'filesize': 1000, 'width': 400},
+ {'format_id': 'D', 'filesize': 2000, 'width': 600},
+ {'format_id': 'E', 'filesize': 3000},
+ {'format_id': 'F'},
+ {'format_id': 'G', 'filesize': 1000000},
+ ]
+ for f in formats:
+ f['url'] = 'http://_/'
+ f['ext'] = 'unknown'
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best[filesize<3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'D')
+
+ ydl = YDL({'format': 'best[filesize<=3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'E')
+
+ ydl = YDL({'format': 'best[filesize <= ? 3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'F')
+
+ ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'B')
+
+ ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'C')
+
+ ydl = YDL({'format': '[filesize>?1]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'G')
+
+ ydl = YDL({'format': '[filesize<1M]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'E')
+
+ ydl = YDL({'format': '[filesize<1MiB]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'G')
+
+ ydl = YDL({'format': 'all[width>=400][width<=600]'})
+ ydl.process_ie_result(info_dict)
+ downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+ self.assertEqual(downloaded_ids, ['B', 'C', 'D'])
+
+ ydl = YDL({'format': 'best[height<40]'})
+ try:
+ ydl.process_ie_result(info_dict)
+ except ExtractorError:
+ pass
+ self.assertEqual(ydl.downloaded_info_dicts, [])
+
+ def test_default_format_spec(self):
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
+
+ ydl = YDL({'outtmpl': '-'})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+
+class TestYoutubeDL(unittest.TestCase):
+ def test_subtitles(self):
+ def s_formats(lang, autocaption=False):
+ return [{
+ 'ext': ext,
+ 'url': 'http://localhost/video.%s.%s' % (lang, ext),
+ '_auto': autocaption,
+ } for ext in ['vtt', 'srt', 'ass']]
+ subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
+ auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
+ info_dict = {
+ 'id': 'test',
+ 'title': 'Test',
+ 'url': 'http://localhost/video.mp4',
+ 'subtitles': subtitles,
+ 'automatic_captions': auto_captions,
+ 'extractor': 'TEST',
+ }
+
+ def get_info(params={}):
+ params.setdefault('simulate', True)
+ ydl = YDL(params)
+ ydl.report_warning = lambda *args, **kargs: None
+ return ydl.process_video_result(info_dict, download=False)
+
+ result = get_info()
+ self.assertFalse(result.get('requested_subtitles'))
+ self.assertEqual(result['subtitles'], subtitles)
+ self.assertEqual(result['automatic_captions'], auto_captions)
+
+ result = get_info({'writesubtitles': True})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['en']))
+ self.assertTrue(subs['en'].get('data') is None)
+ self.assertEqual(subs['en']['ext'], 'ass')
+
+ result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
+ subs = result['requested_subtitles']
+ self.assertEqual(subs['en']['ext'], 'srt')
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+
+ result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+ self.assertFalse(subs['es']['_auto'])
+ self.assertTrue(subs['pt']['_auto'])
+
+ result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+ self.assertTrue(subs['es']['_auto'])
+ self.assertTrue(subs['pt']['_auto'])
+
+ def test_add_extra_info(self):
+ test_dict = {
+ 'extractor': 'Foo',
+ }
+ extra_info = {
+ 'extractor': 'Bar',
+ 'playlist': 'funny videos',
+ }
+ YDL.add_extra_info(test_dict, extra_info)
+ self.assertEqual(test_dict['extractor'], 'Foo')
+ self.assertEqual(test_dict['playlist'], 'funny videos')
+
+ def test_prepare_filename(self):
+ info = {
+ 'id': '1234',
+ 'ext': 'mp4',
+ 'width': None,
+ 'height': 1080,
+ 'title1': '$PATH',
+ 'title2': '%PATH%',
+ }
+
+ def fname(templ, na_placeholder='NA'):
+ params = {'outtmpl': templ}
+ if na_placeholder != 'NA':
+ params['outtmpl_na_placeholder'] = na_placeholder
+ ydl = YoutubeDL(params)
+ return ydl.prepare_filename(info)
+ self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
+ self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
+ NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
+ # Replace missing fields with 'NA' by default
+ self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
+ # Or by provided placeholder
+ self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
+ self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
+ self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
+ self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
+ self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
+ self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
+ self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
+ self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
+ self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
+ self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
+ self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
+ self.assertEqual(fname('%%'), '%')
+ self.assertEqual(fname('%%%%'), '%%')
+ self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
+ self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
+ self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
+ self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
+ self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
+ self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
+
+ def test_format_note(self):
+ ydl = YoutubeDL()
+ self.assertEqual(ydl._format_note({}), '')
+ assertRegexpMatches(self, ydl._format_note({
+ 'vbr': 10,
+ }), r'^\s*10k$')
+ assertRegexpMatches(self, ydl._format_note({
+ 'fps': 30,
+ }), r'^30fps$')
+
+ def test_postprocessors(self):
+ filename = 'post-processor-testfile.mp4'
+ audiofile = filename + '.mp3'
+
+ class SimplePP(PostProcessor):
+ def run(self, info):
+ with open(audiofile, 'wt') as f:
+ f.write('EXAMPLE')
+ return [info['filepath']], info
+
+ def run_pp(params, PP):
+ with open(filename, 'wt') as f:
+ f.write('EXAMPLE')
+ ydl = YoutubeDL(params)
+ ydl.add_post_processor(PP())
+ ydl.post_process(filename, {'filepath': filename})
+
+ run_pp({'keepvideo': True}, SimplePP)
+ self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+ self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+ os.unlink(filename)
+ os.unlink(audiofile)
+
+ run_pp({'keepvideo': False}, SimplePP)
+ self.assertFalse(os.path.exists(filename), '%s exists' % filename)
+ self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+ os.unlink(audiofile)
+
+ class ModifierPP(PostProcessor):
+ def run(self, info):
+ with open(info['filepath'], 'wt') as f:
+ f.write('MODIFIED')
+ return [], info
+
+ run_pp({'keepvideo': False}, ModifierPP)
+ self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+ os.unlink(filename)
+
+ def test_match_filter(self):
+ class FilterYDL(YDL):
+ def __init__(self, *args, **kwargs):
+ super(FilterYDL, self).__init__(*args, **kwargs)
+ self.params['simulate'] = True
+
+ def process_info(self, info_dict):
+ super(YDL, self).process_info(info_dict)
+
+ def _match_entry(self, info_dict, incomplete):
+ res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
+ if res is None:
+ self.downloaded_info_dicts.append(info_dict)
+ return res
+
+ first = {
+ 'id': '1',
+ 'url': TEST_URL,
+ 'title': 'one',
+ 'extractor': 'TEST',
+ 'duration': 30,
+ 'filesize': 10 * 1024,
+ 'playlist_id': '42',
+ 'uploader': "變態妍字幕版 太妍 тест",
+ 'creator': "тест ' 123 ' тест--",
+ }
+ second = {
+ 'id': '2',
+ 'url': TEST_URL,
+ 'title': 'two',
+ 'extractor': 'TEST',
+ 'duration': 10,
+ 'description': 'foo',
+ 'filesize': 5 * 1024,
+ 'playlist_id': '43',
+ 'uploader': "тест 123",
+ }
+ videos = [first, second]
+
+ def get_videos(filter_=None):
+ ydl = FilterYDL({'match_filter': filter_})
+ for v in videos:
+ ydl.process_ie_result(v, download=True)
+ return [v['id'] for v in ydl.downloaded_info_dicts]
+
+ res = get_videos()
+ self.assertEqual(res, ['1', '2'])
+
+ def f(v):
+ if v['id'] == '1':
+ return None
+ else:
+ return 'Video id is not 1'
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('duration < 30')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description = foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description =? foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['1', '2'])
+
+ f = match_filter_func('filesize > 5KiB')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('playlist_id = 42')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('creator = "тест \' 123 \' тест--"')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
+ res = get_videos(f)
+ self.assertEqual(res, [])
+
+ def test_playlist_items_selection(self):
+ entries = [{
+ 'id': compat_str(i),
+ 'title': compat_str(i),
+ 'url': TEST_URL,
+ } for i in range(1, 5)]
+ playlist = {
+ '_type': 'playlist',
+ 'id': 'test',
+ 'entries': entries,
+ 'extractor': 'test:playlist',
+ 'extractor_key': 'test:playlist',
+ 'webpage_url': 'http://example.com',
+ }
+
+ def get_downloaded_info_dicts(params):
+ ydl = YDL(params)
+ # make a deep copy because the dictionary and nested entries
+ # can be modified
+ ydl.process_ie_result(copy.deepcopy(playlist))
+ return ydl.downloaded_info_dicts
+
+ def get_ids(params):
+ return [int(v['id']) for v in get_downloaded_info_dicts(params)]
+
+ result = get_ids({})
+ self.assertEqual(result, [1, 2, 3, 4])
+
+ result = get_ids({'playlistend': 10})
+ self.assertEqual(result, [1, 2, 3, 4])
+
+ result = get_ids({'playlistend': 2})
+ self.assertEqual(result, [1, 2])
+
+ result = get_ids({'playliststart': 10})
+ self.assertEqual(result, [])
+
+ result = get_ids({'playliststart': 2})
+ self.assertEqual(result, [2, 3, 4])
+
+ result = get_ids({'playlist_items': '2-4'})
+ self.assertEqual(result, [2, 3, 4])
+
+ result = get_ids({'playlist_items': '2,4'})
+ self.assertEqual(result, [2, 4])
+
+ result = get_ids({'playlist_items': '10'})
+ self.assertEqual(result, [])
+
+ result = get_ids({'playlist_items': '3-10'})
+ self.assertEqual(result, [3, 4])
+
+ result = get_ids({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result, [2, 3, 4])
+
+ # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
+ # @{
+ result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result[0]['playlist_index'], 2)
+ self.assertEqual(result[1]['playlist_index'], 3)
+
+ result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result[0]['playlist_index'], 2)
+ self.assertEqual(result[1]['playlist_index'], 3)
+ self.assertEqual(result[2]['playlist_index'], 4)
+
+ result = get_downloaded_info_dicts({'playlist_items': '4,2'})
+ self.assertEqual(result[0]['playlist_index'], 4)
+ self.assertEqual(result[1]['playlist_index'], 2)
+ # @}
+
+ def test_urlopen_no_file_protocol(self):
+ # see https://github.com/ytdl-org/youtube-dl/issues/8227
+ ydl = YDL()
+ self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
+
+ def test_do_not_override_ie_key_in_url_transparent(self):
+ ydl = YDL()
+
+ class Foo1IE(InfoExtractor):
+ _VALID_URL = r'foo1:'
+
+ def _real_extract(self, url):
+ return {
+ '_type': 'url_transparent',
+ 'url': 'foo2:',
+ 'ie_key': 'Foo2',
+ 'title': 'foo1 title',
+ 'id': 'foo1_id',
+ }
+
+ class Foo2IE(InfoExtractor):
+ _VALID_URL = r'foo2:'
+
+ def _real_extract(self, url):
+ return {
+ '_type': 'url',
+ 'url': 'foo3:',
+ 'ie_key': 'Foo3',
+ }
+
+ class Foo3IE(InfoExtractor):
+ _VALID_URL = r'foo3:'
+
+ def _real_extract(self, url):
+ return _make_result([{'url': TEST_URL}], title='foo3 title')
+
+ ydl.add_info_extractor(Foo1IE(ydl))
+ ydl.add_info_extractor(Foo2IE(ydl))
+ ydl.add_info_extractor(Foo3IE(ydl))
+ ydl.extract_info('foo1:')
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['url'], TEST_URL)
+ self.assertEqual(downloaded['title'], 'foo1 title')
+ self.assertEqual(downloaded['id'], 'testid')
+ self.assertEqual(downloaded['extractor'], 'testex')
+ self.assertEqual(downloaded['extractor_key'], 'TestEx')
+
+ # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
+ def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
+
+ class _YDL(YDL):
+ def __init__(self, *args, **kwargs):
+ super(_YDL, self).__init__(*args, **kwargs)
+
+ def trouble(self, s, tb=None):
+ pass
+
+ ydl = _YDL({
+ 'format': 'extra',
+ 'ignoreerrors': True,
+ })
+
+ class VideoIE(InfoExtractor):
+ _VALID_URL = r'video:(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats = [{
+ 'format_id': 'default',
+ 'url': 'url:',
+ }]
+ if video_id == '0':
+ raise ExtractorError('foo')
+ if video_id == '2':
+ formats.append({
+ 'format_id': 'extra',
+ 'url': TEST_URL,
+ })
+ return {
+ 'id': video_id,
+ 'title': 'Video %s' % video_id,
+ 'formats': formats,
+ }
+
+ class PlaylistIE(InfoExtractor):
+ _VALID_URL = r'playlist:'
+
+ def _entries(self):
+ for n in range(3):
+ video_id = compat_str(n)
+ yield {
+ '_type': 'url_transparent',
+ 'ie_key': VideoIE.ie_key(),
+ 'id': video_id,
+ 'url': 'video:%s' % video_id,
+ 'title': 'Video Transparent %s' % video_id,
+ }
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._entries())
+
+ ydl.add_info_extractor(VideoIE(ydl))
+ ydl.add_info_extractor(PlaylistIE(ydl))
+ info = ydl.extract_info('playlist:')
+ entries = info['entries']
+ self.assertEqual(len(entries), 3)
+ self.assertTrue(entries[0] is None)
+ self.assertTrue(entries[1] is None)
+ self.assertEqual(len(ydl.downloaded_info_dicts), 1)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(entries[2], downloaded)
+ self.assertEqual(downloaded['url'], TEST_URL)
+ self.assertEqual(downloaded['title'], 'Video Transparent 2')
+ self.assertEqual(downloaded['id'], '2')
+ self.assertEqual(downloaded['extractor'], 'Video')
+ self.assertEqual(downloaded['extractor_key'], 'Video')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
new file mode 100644
index 0000000..eff9b16
--- /dev/null
+++ b/test/test_YoutubeDLCookieJar.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import os
+import re
+import sys
+import tempfile
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from hypervideo_dl.utils import YoutubeDLCookieJar
+
+
+class TestYoutubeDLCookieJar(unittest.TestCase):
+ def test_keep_session_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ tf = tempfile.NamedTemporaryFile(delete=False)
+ try:
+ cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
+ temp = tf.read().decode('utf-8')
+ self.assertTrue(re.search(
+ r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
+ self.assertTrue(re.search(
+ r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
+ finally:
+ tf.close()
+ os.remove(tf.name)
+
+ def test_strip_httponly_prefix(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+
+ def assert_cookie_has_value(key):
+ self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
+
+ assert_cookie_has_value('HTTPONLY_COOKIE')
+ assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
+
+ def test_malformed_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ # Cookies should be empty since all malformed cookie file entries
+ # will be ignored
+ self.assertFalse(cookiejar._cookies)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
new file mode 100644
index 0000000..444c65e
--- /dev/null
+++ b/test/test_aes.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from hypervideo_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
+from hypervideo_dl.utils import bytes_to_intlist, intlist_to_bytes
+import base64
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+ def setUp(self):
+ self.key = self.iv = [0x20, 0x15] + 14 * [0]
+ self.secret_msg = b'Secret message goes here'
+
+ def test_encrypt(self):
+ msg = b'message'
+ key = list(range(16))
+ encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+ decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+ self.assertEqual(decrypted, msg)
+
+ def test_cbc_decrypt(self):
+ data = bytes_to_intlist(
+ b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
+ )
+ decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_cbc_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
+ self.assertEqual(
+ encrypted,
+ b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
+
+ def test_decrypt_text(self):
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8])
+ + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ ).decode('utf-8')
+ decrypted = (aes_decrypt_text(encrypted, password, 16))
+ self.assertEqual(decrypted, self.secret_msg)
+
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8])
+ + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ ).decode('utf-8')
+ decrypted = (aes_decrypt_text(encrypted, password, 32))
+ self.assertEqual(decrypted, self.secret_msg)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
new file mode 100644
index 0000000..5d1a8f2
--- /dev/null
+++ b/test/test_age_restriction.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import try_rm
+
+
+from hypervideo_dl import YoutubeDL
+
+
+def _download_restricted(url, filename, age):
+ """ Returns true if the file has been downloaded """
+
+ params = {
+ 'age_limit': age,
+ 'skip_download': True,
+ 'writeinfojson': True,
+ 'outtmpl': '%(id)s.%(ext)s',
+ }
+ ydl = YoutubeDL(params)
+ ydl.add_default_info_extractors()
+ json_filename = os.path.splitext(filename)[0] + '.info.json'
+ try_rm(json_filename)
+ ydl.download([url])
+ res = os.path.exists(json_filename)
+ try_rm(json_filename)
+ return res
+
+
+class TestAgeRestriction(unittest.TestCase):
+ def _assert_restricted(self, url, filename, age, old_age=None):
+ self.assertTrue(_download_restricted(url, filename, old_age))
+ self.assertFalse(_download_restricted(url, filename, age))
+
+ def test_youtube(self):
+ self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+
+ def test_youporn(self):
+ self._assert_restricted(
+ 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ '505835.mp4', 2, old_age=25)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
new file mode 100644
index 0000000..3f6ba11
--- /dev/null
+++ b/test/test_all_urls.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+import collections
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import gettestcases
+
+from hypervideo_dl.extractor import (
+ FacebookIE,
+ gen_extractors,
+ YoutubeIE,
+)
+
+
+class TestAllURLsMatching(unittest.TestCase):
+ def setUp(self):
+ self.ies = gen_extractors()
+
+ def matching_ies(self, url):
+ return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
+
+ def assertMatch(self, url, ie_list):
+ self.assertEqual(self.matching_ies(url), ie_list)
+
+ def test_youtube_playlist_matching(self):
+ assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+ assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
+ assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+ assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
+ assertPlaylist('PL63F0C78739B09958')
+ assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+ assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+ assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+ assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
+ self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
+ # Top tracks
+ assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
+
+ def test_youtube_matching(self):
+ self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
+ self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
+ self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
+ self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+ self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
+ self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
+
+ def test_youtube_channel_matching(self):
+ assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
+
+ def test_youtube_user_matching(self):
+ self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
+
+ def test_youtube_feeds(self):
+ self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
+ self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
+ self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
+ self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
+
+ # def test_youtube_search_matching(self):
+ # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+
+ def test_facebook_matching(self):
+ self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+ self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
+
+ def test_no_duplicates(self):
+ ies = gen_extractors()
+ for tc in gettestcases(include_onlymatching=True):
+ url = tc['url']
+ for ie in ies:
+ if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
+ self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
+ else:
+ self.assertFalse(
+ ie.suitable(url),
+ '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name']))
+
+ def test_keywords(self):
+ self.assertMatch(':ytsubs', ['youtube:subscriptions'])
+ self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+ self.assertMatch(':ythistory', ['youtube:history'])
+
+ def test_vimeo_matching(self):
+ self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+ self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
+
+ # https://github.com/ytdl-org/youtube-dl/issues/1930
+ def test_soundcloud_not_matching_sets(self):
+ self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
+
+ def test_tumblr(self):
+ self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
+ self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
+
+ def test_pbs(self):
+ # https://github.com/ytdl-org/youtube-dl/issues/2350
+ self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
+ self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
+
+ def test_no_duplicated_ie_names(self):
+ name_accu = collections.defaultdict(list)
+ for ie in self.ies:
+ name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
+ for (ie_name, ie_list) in name_accu.items():
+ self.assertEqual(
+ len(ie_list), 1,
+ 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_cache.py b/test/test_cache.py
new file mode 100644
index 0000000..c7a88f9
--- /dev/null
+++ b/test/test_cache.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import shutil
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL
+from hypervideo_dl.cache import Cache
+
+
+def _is_empty(d):
+ return not bool(os.listdir(d))
+
+
+def _mkdir(d):
+ if not os.path.exists(d):
+ os.mkdir(d)
+
+
+class TestCache(unittest.TestCase):
+ def setUp(self):
+ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+ TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+ _mkdir(TESTDATA_DIR)
+ self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
+ self.tearDown()
+
+ def tearDown(self):
+ if os.path.exists(self.test_dir):
+ shutil.rmtree(self.test_dir)
+
+ def test_cache(self):
+ ydl = FakeYDL({
+ 'cachedir': self.test_dir,
+ })
+ c = Cache(ydl)
+ obj = {'x': 1, 'y': ['ä', '\\a', True]}
+ self.assertEqual(c.load('test_cache', 'k.'), None)
+ c.store('test_cache', 'k.', obj)
+ self.assertEqual(c.load('test_cache', 'k2'), None)
+ self.assertFalse(_is_empty(self.test_dir))
+ self.assertEqual(c.load('test_cache', 'k.'), obj)
+ self.assertEqual(c.load('test_cache', 'y'), None)
+ self.assertEqual(c.load('test_cache2', 'k.'), None)
+ c.remove()
+ self.assertFalse(os.path.exists(self.test_dir))
+ self.assertEqual(c.load('test_cache', 'k.'), None)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_compat.py b/test/test_compat.py
new file mode 100644
index 0000000..c68d7fa
--- /dev/null
+++ b/test/test_compat.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from hypervideo_dl.compat import (
+ compat_getenv,
+ compat_setenv,
+ compat_etree_Element,
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_shlex_split,
+ compat_str,
+ compat_struct_unpack,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_parse_urlencode,
+)
+
+
+class TestCompat(unittest.TestCase):
+ def test_compat_getenv(self):
+ test_str = 'тест'
+ compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
+ self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
+
+ def test_compat_setenv(self):
+ test_var = 'YOUTUBE_DL_COMPAT_SETENV'
+ test_str = 'тест'
+ compat_setenv(test_var, test_str)
+ compat_getenv(test_var)
+ self.assertEqual(compat_getenv(test_var), test_str)
+
+ def test_compat_expanduser(self):
+ old_home = os.environ.get('HOME')
+ test_str = r'C:\Documents and Settings\тест\Application Data'
+ compat_setenv('HOME', test_str)
+ self.assertEqual(compat_expanduser('~'), test_str)
+ compat_setenv('HOME', old_home or '')
+
+ def test_all_present(self):
+ import hypervideo_dl.compat
+ all_names = hypervideo_dl.compat.__all__
+ present_names = set(filter(
+ lambda c: '_' in c and not c.startswith('_'),
+ dir(hypervideo_dl.compat))) - set(['unicode_literals'])
+ self.assertEqual(all_names, sorted(present_names))
+
+ def test_compat_urllib_parse_unquote(self):
+ self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
+ self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
+ self.assertEqual(compat_urllib_parse_unquote(''), '')
+ self.assertEqual(compat_urllib_parse_unquote('%'), '%')
+ self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
+ self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
+ self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
+ self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
+ self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
+ self.assertEqual(
+ compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
+%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
+ '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
+%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
+ self.assertEqual(
+ compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
+ '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
+
+ def test_compat_urllib_parse_unquote_plus(self):
+ self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
+ self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
+
+ def test_compat_urllib_parse_urlencode(self):
+ self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
+ self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
+
+ def test_compat_shlex_split(self):
+ self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
+ self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
+ self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
+
+ def test_compat_etree_Element(self):
+ try:
+ compat_etree_Element.items
+ except AttributeError:
+ self.fail('compat_etree_Element is not a type')
+
+ def test_compat_etree_fromstring(self):
+ xml = '''
+ <root foo="bar" spam="中文">
+ <normal>foo</normal>
+ <chinese>中文</chinese>
+ <foo><bar>spam</bar></foo>
+ </root>
+ '''
+ doc = compat_etree_fromstring(xml.encode('utf-8'))
+ self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
+ self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
+ self.assertTrue(isinstance(doc.find('normal').text, compat_str))
+ self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
+ self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
+
+ def test_compat_etree_fromstring_doctype(self):
+ xml = '''<?xml version="1.0"?>
+<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
+<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
+ compat_etree_fromstring(xml)
+
+ def test_struct_unpack(self):
+ self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_download.py b/test/test_download.py
new file mode 100644
index 0000000..a47369e
--- /dev/null
+++ b/test/test_download.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import (
+ assertGreaterEqual,
+ expect_warnings,
+ get_params,
+ gettestcases,
+ expect_info_dict,
+ try_rm,
+ report_warning,
+)
+
+
+import hashlib
+import io
+import json
+import socket
+
+import hypervideo_dl.YoutubeDL
+from hypervideo_dl.compat import (
+ compat_http_client,
+ compat_urllib_error,
+ compat_HTTPError,
+)
+from hypervideo_dl.utils import (
+ DownloadError,
+ ExtractorError,
+ format_bytes,
+ UnavailableVideoError,
+)
+from hypervideo_dl.extractor import get_info_extractor
+
+RETRIES = 3
+
+
+class YoutubeDL(hypervideo_dl.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ self.to_stderr = self.to_screen
+ self.processed_info_dicts = []
+ super(YoutubeDL, self).__init__(*args, **kwargs)
+
+ def report_warning(self, message):
+ # Don't accept warnings during tests
+ raise ExtractorError(message)
+
+ def process_info(self, info_dict):
+ self.processed_info_dicts.append(info_dict)
+ return super(YoutubeDL, self).process_info(info_dict)
+
+
+def _file_md5(fn):
+ with open(fn, 'rb') as f:
+ return hashlib.md5(f.read()).hexdigest()
+
+
+defs = gettestcases()
+
+
+class TestDownload(unittest.TestCase):
+ # Parallel testing in nosetests. See
+ # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
+ _multiprocess_shared_ = True
+
+ maxDiff = None
+
+ def __str__(self):
+ """Identify each test with the `add_ie` attribute, if available."""
+
+ def strclass(cls):
+ """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
+ return '%s.%s' % (cls.__module__, cls.__name__)
+
+ add_ie = getattr(self, self._testMethodName).add_ie
+ return '%s (%s)%s:' % (self._testMethodName,
+ strclass(self.__class__),
+ ' [%s]' % add_ie if add_ie else '')
+
+ def setUp(self):
+ self.defs = defs
+
+# Dynamically generate tests
+
+
+def generator(test_case, tname):
+
+ def test_template(self):
+ ie = hypervideo_dl.extractor.get_info_extractor(test_case['name'])()
+ other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
+ is_playlist = any(k.startswith('playlist') for k in test_case)
+ test_cases = test_case.get(
+ 'playlist', [] if is_playlist else [test_case])
+
+ def print_skipping(reason):
+ print('Skipping %s: %s' % (test_case['name'], reason))
+ if not ie.working():
+ print_skipping('IE marked as not _WORKING')
+ return
+
+ for tc in test_cases:
+ info_dict = tc.get('info_dict', {})
+ if not (info_dict.get('id') and info_dict.get('ext')):
+ raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+
+ if 'skip' in test_case:
+ print_skipping(test_case['skip'])
+ return
+ for other_ie in other_ies:
+ if not other_ie.working():
+ print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+ return
+
+ params = get_params(test_case.get('params', {}))
+ params['outtmpl'] = tname + '_' + params['outtmpl']
+ if is_playlist and 'playlist' not in test_case:
+ params.setdefault('extract_flat', 'in_playlist')
+ params.setdefault('skip_download', True)
+
+ ydl = YoutubeDL(params, auto_init=False)
+ ydl.add_default_info_extractors()
+ finished_hook_called = set()
+
+ def _hook(status):
+ if status['status'] == 'finished':
+ finished_hook_called.add(status['filename'])
+ ydl.add_progress_hook(_hook)
+ expect_warnings(ydl, test_case.get('expected_warnings', []))
+
+ def get_tc_filename(tc):
+ return ydl.prepare_filename(tc.get('info_dict', {}))
+
+ res_dict = None
+
+ def try_rm_tcs_files(tcs=None):
+ if tcs is None:
+ tcs = test_cases
+ for tc in tcs:
+ tc_filename = get_tc_filename(tc)
+ try_rm(tc_filename)
+ try_rm(tc_filename + '.part')
+ try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
+ try_rm_tcs_files()
+ try:
+ try_num = 1
+ while True:
+ try:
+ # We're not using .download here since that is just a shim
+ # for outside error handling, and returns the exit code
+ # instead of the result dict.
+ res_dict = ydl.extract_info(
+ test_case['url'],
+ force_generic_extractor=params.get('force_generic_extractor', False))
+ except (DownloadError, ExtractorError) as err:
+ # Check if the exception is not a network related one
+ if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
+ raise
+
+ if try_num == RETRIES:
+ report_warning('%s failed due to network errors, skipping...' % tname)
+ return
+
+ print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
+
+ try_num += 1
+ else:
+ break
+
+ if is_playlist:
+ self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video'])
+ self.assertTrue('entries' in res_dict)
+ expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
+
+ if 'playlist_mincount' in test_case:
+ assertGreaterEqual(
+ self,
+ len(res_dict['entries']),
+ test_case['playlist_mincount'],
+ 'Expected at least %d in playlist %s, but got only %d' % (
+ test_case['playlist_mincount'], test_case['url'],
+ len(res_dict['entries'])))
+ if 'playlist_count' in test_case:
+ self.assertEqual(
+ len(res_dict['entries']),
+ test_case['playlist_count'],
+ 'Expected %d entries in playlist %s, but got %d.' % (
+ test_case['playlist_count'],
+ test_case['url'],
+ len(res_dict['entries']),
+ ))
+ if 'playlist_duration_sum' in test_case:
+ got_duration = sum(e['duration'] for e in res_dict['entries'])
+ self.assertEqual(
+ test_case['playlist_duration_sum'], got_duration)
+
+ # Generalize both playlists and single videos to unified format for
+ # simplicity
+ if 'entries' not in res_dict:
+ res_dict['entries'] = [res_dict]
+
+ for tc_num, tc in enumerate(test_cases):
+ tc_res_dict = res_dict['entries'][tc_num]
+ # First, check test cases' data against extracted data alone
+ expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
+ # Now, check downloaded file consistency
+ tc_filename = get_tc_filename(tc)
+ if not test_case.get('params', {}).get('skip_download', False):
+ self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
+ self.assertTrue(tc_filename in finished_hook_called)
+ expected_minsize = tc.get('file_minsize', 10000)
+ if expected_minsize is not None:
+ if params.get('test'):
+ expected_minsize = max(expected_minsize, 10000)
+ got_fsize = os.path.getsize(tc_filename)
+ assertGreaterEqual(
+ self, got_fsize, expected_minsize,
+ 'Expected %s to be at least %s, but it\'s only %s ' %
+ (tc_filename, format_bytes(expected_minsize),
+ format_bytes(got_fsize)))
+ if 'md5' in tc:
+ md5_for_file = _file_md5(tc_filename)
+ self.assertEqual(tc['md5'], md5_for_file)
+ # Finally, check test cases' data again but this time against
+ # extracted data from info JSON file written during processing
+ info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
+ self.assertTrue(
+ os.path.exists(info_json_fn),
+ 'Missing info file %s' % info_json_fn)
+ with io.open(info_json_fn, encoding='utf-8') as infof:
+ info_dict = json.load(infof)
+ expect_info_dict(self, info_dict, tc.get('info_dict', {}))
+ finally:
+ try_rm_tcs_files()
+ if is_playlist and res_dict is not None and res_dict.get('entries'):
+ # Remove all other files that may have been extracted if the
+ # extractor returns full results even with extract_flat
+ res_tcs = [{'info_dict': e} for e in res_dict['entries']]
+ try_rm_tcs_files(res_tcs)
+
+ return test_template
+
+
+# And add them to TestDownload
+for n, test_case in enumerate(defs):
+ tname = 'test_' + str(test_case['name'])
+ i = 1
+ while hasattr(TestDownload, tname):
+ tname = 'test_%s_%d' % (test_case['name'], i)
+ i += 1
+ test_method = generator(test_case, tname)
+ test_method.__name__ = str(tname)
+ ie_list = test_case.get('add_ie')
+ test_method.add_ie = ie_list and ','.join(ie_list)
+ setattr(TestDownload, test_method.__name__, test_method)
+ del test_method
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
new file mode 100644
index 0000000..5296de8
--- /dev/null
+++ b/test/test_downloader_http.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import http_server_port, try_rm
+from hypervideo_dl import YoutubeDL
+from hypervideo_dl.compat import compat_http_server
+from hypervideo_dl.downloader.http import HttpFD
+from hypervideo_dl.utils import encodeFilename
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def send_content_range(self, total=None):
+ range_header = self.headers.get('Range')
+ start = end = None
+ if range_header:
+ mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
+ if mobj:
+ start = int(mobj.group(1))
+ end = int(mobj.group(2))
+ valid_range = start is not None and end is not None
+ if valid_range:
+ content_range = 'bytes %d-%d' % (start, end)
+ if total:
+ content_range += '/%d' % total
+ self.send_header('Content-Range', content_range)
+ return (end - start + 1) if valid_range else total
+
+ def serve(self, range=True, content_length=True):
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ size = TEST_SIZE
+ if range:
+ size = self.send_content_range(TEST_SIZE)
+ if content_length:
+ self.send_header('Content-Length', size)
+ self.end_headers()
+ self.wfile.write(b'#' * size)
+
+ def do_GET(self):
+ if self.path == '/regular':
+ self.serve()
+ elif self.path == '/no-content-length':
+ self.serve(content_length=False)
+ elif self.path == '/no-range':
+ self.serve(range=False)
+ elif self.path == '/no-range-no-content-length':
+ self.serve(range=False, content_length=False)
+ else:
+ assert False
+
+
+class FakeLogger(object):
+ def debug(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ pass
+
+
+class TestHttpFD(unittest.TestCase):
+ def setUp(self):
+ self.httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def download(self, params, ep):
+ params['logger'] = FakeLogger()
+ ydl = YoutubeDL(params)
+ downloader = HttpFD(ydl, params)
+ filename = 'testfile.mp4'
+ try_rm(encodeFilename(filename))
+ self.assertTrue(downloader.real_download(filename, {
+ 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
+ }))
+ self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+ try_rm(encodeFilename(filename))
+
+ def download_all(self, params):
+ for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+ self.download(params, ep)
+
+ def test_regular(self):
+ self.download_all({})
+
+ def test_chunked(self):
+ self.download_all({
+ 'http_chunk_size': 1000,
+ })
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_execution.py b/test/test_execution.py
new file mode 100644
index 0000000..f049551
--- /dev/null
+++ b/test/test_execution.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import unittest
+
+import sys
+import os
+import subprocess
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from hypervideo_dl.utils import encodeArgument
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+try:
+ _DEV_NULL = subprocess.DEVNULL
+except AttributeError:
+ _DEV_NULL = open(os.devnull, 'wb')
+
+
+class TestExecution(unittest.TestCase):
+ def test_import(self):
+ subprocess.check_call([sys.executable, '-c', 'import hypervideo_dl'], cwd=rootDir)
+
+ def test_module_exec(self):
+ if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution
+ subprocess.check_call([sys.executable, '-m', 'hypervideo_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+
+ def test_main_exec(self):
+ subprocess.check_call([sys.executable, 'hypervideo_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+
+ def test_cmdline_umlauts(self):
+ p = subprocess.Popen(
+ [sys.executable, 'hypervideo_dl/__main__.py', encodeArgument('ä'), '--version'],
+ cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
+ _, stderr = p.communicate()
+ self.assertFalse(stderr)
+
+ def test_lazy_extractors(self):
+ try:
+ subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'hypervideo_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+ finally:
+ try:
+ os.remove('hypervideo_dl/extractor/lazy_extractors.py')
+ except (IOError, OSError):
+ pass
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_http.py b/test/test_http.py
new file mode 100644
index 0000000..6eaef81
--- /dev/null
+++ b/test/test_http.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import http_server_port
+from hypervideo_dl import YoutubeDL
+from hypervideo_dl.compat import compat_http_server, compat_urllib_request
+import ssl
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/video.html':
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+ elif self.path == '/vid.mp4':
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ self.end_headers()
+ self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
+ elif self.path == '/302':
+ if sys.version_info[0] == 3:
+ # XXX: Python 3 http server does not allow non-ASCII header values
+ self.send_response(404)
+ self.end_headers()
+ return
+
+ new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
+ self.send_response(302)
+ self.send_header(b'Location', new_url.encode('utf-8'))
+ self.end_headers()
+ elif self.path == '/%E4%B8%AD%E6%96%87.html':
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+ else:
+ assert False
+
+
+class FakeLogger(object):
+ def debug(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ pass
+
+
+class TestHTTP(unittest.TestCase):
+ def setUp(self):
+ self.httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def test_unicode_path_redirection(self):
+ # XXX: Python 3 http server does not allow non-ASCII header values
+ if sys.version_info[0] == 3:
+ return
+
+ ydl = YoutubeDL({'logger': FakeLogger()})
+ r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
+ self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
+
+
+class TestHTTPS(unittest.TestCase):
+ def setUp(self):
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ self.httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.httpd.socket = ssl.wrap_socket(
+ self.httpd.socket, certfile=certfn, server_side=True)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def test_nocheckcertificate(self):
+ if sys.version_info >= (2, 7, 9): # No certificate checking anyways
+ ydl = YoutubeDL({'logger': FakeLogger()})
+ self.assertRaises(
+ Exception,
+ ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
+
+ ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
+ r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
+ self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+
+
+def _build_proxy_handler(name):
+ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ proxy_name = name
+
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/plain; charset=utf-8')
+ self.end_headers()
+ self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+ return HTTPTestRequestHandler
+
+
+class TestProxy(unittest.TestCase):
+ def setUp(self):
+ self.proxy = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), _build_proxy_handler('normal'))
+ self.port = http_server_port(self.proxy)
+ self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
+ self.proxy_thread.daemon = True
+ self.proxy_thread.start()
+
+ self.geo_proxy = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), _build_proxy_handler('geo'))
+ self.geo_port = http_server_port(self.geo_proxy)
+ self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
+ self.geo_proxy_thread.daemon = True
+ self.geo_proxy_thread.start()
+
+ def test_proxy(self):
+ geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
+ ydl = YoutubeDL({
+ 'proxy': '127.0.0.1:{0}'.format(self.port),
+ 'geo_verification_proxy': geo_proxy,
+ })
+ url = 'http://foo.com/bar'
+ response = ydl.urlopen(url).read().decode('utf-8')
+ self.assertEqual(response, 'normal: {0}'.format(url))
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Ytdl-request-proxy', geo_proxy)
+ response = ydl.urlopen(req).read().decode('utf-8')
+ self.assertEqual(response, 'geo: {0}'.format(url))
+
+ def test_proxy_with_idn(self):
+ ydl = YoutubeDL({
+ 'proxy': '127.0.0.1:{0}'.format(self.port),
+ })
+ url = 'http://中文.tw/'
+ response = ydl.urlopen(url).read().decode('utf-8')
+ # b'xn--fiq228c' is '中文'.encode('idna')
+ self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_netrc.py b/test/test_netrc.py
new file mode 100644
index 0000000..50b9e5b
--- /dev/null
+++ b/test/test_netrc.py
@@ -0,0 +1,26 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from hypervideo_dl.extractor import (
+ gen_extractors,
+)
+
+
+class TestNetRc(unittest.TestCase):
+ def test_netrc_present(self):
+ for ie in gen_extractors():
+ if not hasattr(ie, '_login'):
+ continue
+ self.assertTrue(
+ hasattr(ie, '_NETRC_MACHINE'),
+ 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_options.py b/test/test_options.py
new file mode 100644
index 0000000..0b2458e
--- /dev/null
+++ b/test/test_options.py
@@ -0,0 +1,26 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from hypervideo_dl.options import _hide_login_info
+
+
+class TestOptions(unittest.TestCase):
+ def test_hide_login_info(self):
+ self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
+ ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+ self.assertEqual(_hide_login_info(['-u']), ['-u'])
+ self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
+ ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+ self.assertEqual(_hide_login_info(['--username=foo']),
+ ['--username=PRIVATE'])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
new file mode 100644
index 0000000..bbea1c9
--- /dev/null
+++ b/test/test_postprocessors.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from hypervideo_dl.postprocessor import MetadataFromTitlePP
+
+
+class TestMetadataFromTitle(unittest.TestCase):
+ def test_format_to_regex(self):
+ pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
+ self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
diff --git a/test/test_socks.py b/test/test_socks.py
new file mode 100644
index 0000000..47ebf48
--- /dev/null
+++ b/test/test_socks.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import random
+import subprocess
+
+from test.helper import (
+ FakeYDL,
+ get_params,
+)
+from hypervideo_dl.compat import (
+ compat_str,
+ compat_urllib_request,
+)
+
+
+class TestMultipleSocks(unittest.TestCase):
+ @staticmethod
+ def _check_params(attrs):
+ params = get_params()
+ for attr in attrs:
+ if attr not in params:
+ print('Missing %s. Skipping.' % attr)
+ return
+ return params
+
+ def test_proxy_http(self):
+ params = self._check_params(['primary_proxy', 'primary_server_ip'])
+ if params is None:
+ return
+ ydl = FakeYDL({
+ 'proxy': params['primary_proxy']
+ })
+ self.assertEqual(
+ ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'),
+ params['primary_server_ip'])
+
+ def test_proxy_https(self):
+ params = self._check_params(['primary_proxy', 'primary_server_ip'])
+ if params is None:
+ return
+ ydl = FakeYDL({
+ 'proxy': params['primary_proxy']
+ })
+ self.assertEqual(
+ ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'),
+ params['primary_server_ip'])
+
+ def test_secondary_proxy_http(self):
+ params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
+ if params is None:
+ return
+ ydl = FakeYDL()
+ req = compat_urllib_request.Request('http://yt-dl.org/ip')
+ req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
+ self.assertEqual(
+ ydl.urlopen(req).read().decode('utf-8'),
+ params['secondary_server_ip'])
+
+ def test_secondary_proxy_https(self):
+ params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
+ if params is None:
+ return
+ ydl = FakeYDL()
+ req = compat_urllib_request.Request('https://yt-dl.org/ip')
+ req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
+ self.assertEqual(
+ ydl.urlopen(req).read().decode('utf-8'),
+ params['secondary_server_ip'])
+
+
+class TestSocks(unittest.TestCase):
+ _SKIP_SOCKS_TEST = True
+
+ def setUp(self):
+ if self._SKIP_SOCKS_TEST:
+ return
+
+ self.port = random.randint(20000, 30000)
+ self.server_process = subprocess.Popen([
+ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def tearDown(self):
+ if self._SKIP_SOCKS_TEST:
+ return
+
+ self.server_process.terminate()
+ self.server_process.communicate()
+
+ def _get_ip(self, protocol):
+ if self._SKIP_SOCKS_TEST:
+ return '127.0.0.1'
+
+ ydl = FakeYDL({
+ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
+ })
+ return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8')
+
+ def test_socks4(self):
+ self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
+
+ def test_socks4a(self):
+ self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
+
+ def test_socks5(self):
+ self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
new file mode 100644
index 0000000..195340d
--- /dev/null
+++ b/test/test_subtitles.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, md5
+
+
+from hypervideo_dl.extractor import (
+ YoutubeIE,
+ DailymotionIE,
+ TEDIE,
+ VimeoIE,
+ WallaIE,
+ CeskaTelevizeIE,
+ LyndaIE,
+ NPOIE,
+ ComedyCentralIE,
+ NRKTVIE,
+ RaiPlayIE,
+ VikiIE,
+ ThePlatformIE,
+ ThePlatformFeedIE,
+ RTVEALaCartaIE,
+ DemocracynowIE,
+)
+
+
+class BaseTestSubtitles(unittest.TestCase):
+ url = None
+ IE = None
+
+ def setUp(self):
+ self.DL = FakeYDL()
+ self.ie = self.IE()
+ self.DL.add_info_extractor(self.ie)
+
+ def getInfoDict(self):
+ info_dict = self.DL.extract_info(self.url, download=False)
+ return info_dict
+
+ def getSubtitles(self):
+ info_dict = self.getInfoDict()
+ subtitles = info_dict['requested_subtitles']
+ if not subtitles:
+ return subtitles
+ for sub_info in subtitles.values():
+ if sub_info.get('data') is None:
+ uf = self.DL.urlopen(sub_info['url'])
+ sub_info['data'] = uf.read().decode('utf-8')
+ return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
+
+
+class TestYoutubeSubtitles(BaseTestSubtitles):
+ url = 'QRS8MkLhQmM'
+ IE = YoutubeIE
+
+ def test_youtube_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(len(subtitles.keys()), 13)
+ self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+ self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
+ for lang in ['fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+
+ def test_youtube_subtitles_ttml_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'ttml'
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
+
+ def test_youtube_subtitles_vtt_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'vtt'
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+
+ def test_youtube_automatic_captions(self):
+ self.url = '8YoUxe5ncPo'
+ self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslangs'] = ['it']
+ subtitles = self.getSubtitles()
+ self.assertTrue(subtitles['it'] is not None)
+
+ def test_youtube_translated_subtitles(self):
+ # This video has a subtitles track, which can be translated
+ self.url = 'Ky9eprVWzlI'
+ self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslangs'] = ['it']
+ subtitles = self.getSubtitles()
+ self.assertTrue(subtitles['it'] is not None)
+
+ def test_youtube_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'n5BB19UTcdA'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+class TestDailymotionSubtitles(BaseTestSubtitles):
+ url = 'http://www.dailymotion.com/video/xczg00'
+ IE = DailymotionIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertTrue(len(subtitles.keys()) >= 6)
+ self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+ self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+ for lang in ['es', 'fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+class TestTedSubtitles(BaseTestSubtitles):
+ url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
+ IE = TEDIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertTrue(len(subtitles.keys()) >= 28)
+ self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
+ self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
+ for lang in ['es', 'fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+
+
+class TestVimeoSubtitles(BaseTestSubtitles):
+ url = 'http://vimeo.com/76979871'
+ IE = VimeoIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
+ self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+ self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://vimeo.com/56015672'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+class TestWallaSubtitles(BaseTestSubtitles):
+ url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
+ IE = WallaIE
+
+ def test_allsubtitles(self):
+ self.DL.expect_warning('Automatic Captions not supported by this server')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['heb']))
+ self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
+ url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
+ IE = CeskaTelevizeIE
+
+ def test_allsubtitles(self):
+ self.DL.expect_warning('Automatic Captions not supported by this server')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['cs']))
+ self.assertTrue(len(subtitles['cs']) > 20000)
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertFalse(subtitles)
+
+
+class TestLyndaSubtitles(BaseTestSubtitles):
+ url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
+ IE = LyndaIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+
+
+class TestNPOSubtitles(BaseTestSubtitles):
+ url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
+ IE = NPOIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['nl']))
+ self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+
+
+class TestMTVSubtitles(BaseTestSubtitles):
+ url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
+ IE = ComedyCentralIE
+
+ def getInfoDict(self):
+ return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
+
+
+class TestNRKSubtitles(BaseTestSubtitles):
+ url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
+ IE = NRKTVIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['no']))
+ self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+
+
+class TestRaiPlaySubtitles(BaseTestSubtitles):
+ IE = RaiPlayIE
+
+ def test_subtitles_key(self):
+ self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['it']))
+ self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
+
+ def test_subtitles_array_key(self):
+ self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['it']))
+ self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
+
+
+class TestVikiSubtitles(BaseTestSubtitles):
+ url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
+ IE = VikiIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
+
+
+class TestThePlatformSubtitles(BaseTestSubtitles):
+ # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
+ # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
+ url = 'theplatform:JFUjUE1_ehvq'
+ IE = ThePlatformIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+
+
+class TestThePlatformFeedSubtitles(BaseTestSubtitles):
+ url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
+ IE = ThePlatformFeedIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
+
+
+class TestRtveSubtitles(BaseTestSubtitles):
+ url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+ IE = RTVEALaCartaIE
+
+ def test_allsubtitles(self):
+ print('Skipping, only available from Spain')
+ return
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['es']))
+ self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+
+
+class TestDemocracynowSubtitles(BaseTestSubtitles):
+ url = 'http://www.democracynow.org/shows/2015/7/3'
+ IE = DemocracynowIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+
+ def test_subtitles_in_page(self):
+ self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
new file mode 100644
index 0000000..6c1b7ec
--- /dev/null
+++ b/test/test_unicode_literals.py
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import io
+import re
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+IGNORED_FILES = [
+ 'setup.py', # http://bugs.python.org/issue13943
+ 'conf.py',
+ 'buildserver.py',
+]
+
+IGNORED_DIRS = [
+ '.git',
+ '.tox',
+]
+
+from test.helper import assertRegexpMatches
+
+
+class TestUnicodeLiterals(unittest.TestCase):
+ def test_all_files(self):
+ for dirpath, dirnames, filenames in os.walk(rootDir):
+ for ignore_dir in IGNORED_DIRS:
+ if ignore_dir in dirnames:
+ # If we remove the directory from dirnames os.walk won't
+ # recurse into it
+ dirnames.remove(ignore_dir)
+ for basename in filenames:
+ if not basename.endswith('.py'):
+ continue
+ if basename in IGNORED_FILES:
+ continue
+
+ fn = os.path.join(dirpath, basename)
+ with io.open(fn, encoding='utf-8') as inf:
+ code = inf.read()
+
+ if "'" not in code and '"' not in code:
+ continue
+ assertRegexpMatches(
+ self,
+ code,
+ r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
+ 'unicode_literals import missing in %s' % fn)
+
+ m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
+ if m is not None:
+ self.assertTrue(
+ m is None,
+ 'u present in %s, around %s' % (
+ fn, code[m.start() - 10:m.end() + 10]))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
new file mode 100644
index 0000000..d8756a0
--- /dev/null
+++ b/test/test_utils.py
@@ -0,0 +1,1480 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+# Various small unit tests
+import io
+import json
+import xml.etree.ElementTree
+
+from hypervideo_dl.utils import (
+ age_restricted,
+ args_to_str,
+ encode_base_n,
+ caesar,
+ clean_html,
+ clean_podcast_url,
+ date_from_str,
+ DateRange,
+ detect_exe_version,
+ determine_ext,
+ dict_get,
+ encode_compat_str,
+ encodeFilename,
+ escape_rfc3986,
+ escape_url,
+ extract_attributes,
+ ExtractorError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_attribute,
+ get_elements_by_class,
+ get_elements_by_attribute,
+ InAdvancePagedList,
+ int_or_none,
+ intlist_to_bytes,
+ is_html,
+ js_to_json,
+ limit_length,
+ merge_dicts,
+ mimetype2ext,
+ month_by_name,
+ multipart_encode,
+ ohdave_rsa_encrypt,
+ OnDemandPagedList,
+ orderedSet,
+ parse_age_limit,
+ parse_duration,
+ parse_filesize,
+ parse_count,
+ parse_iso8601,
+ parse_resolution,
+ parse_bitrate,
+ pkcs1pad,
+ read_batch_urls,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ expand_path,
+ prepend_extension,
+ replace_extension,
+ remove_start,
+ remove_end,
+ remove_quotes,
+ rot47,
+ shell_quote,
+ smuggle_url,
+ str_to_int,
+ strip_jsonp,
+ strip_or_none,
+ subtitles_filename,
+ timeconvert,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ unsmuggle_url,
+ uppercase_escape,
+ lowercase_escape,
+ url_basename,
+ url_or_none,
+ base_url,
+ urljoin,
+ urlencode_postdata,
+ urshift,
+ update_url_query,
+ version_tuple,
+ xpath_with_ns,
+ xpath_element,
+ xpath_text,
+ xpath_attr,
+ render_table,
+ match_str,
+ parse_dfxp_time_expr,
+ dfxp2srt,
+ cli_option,
+ cli_valueless_option,
+ cli_bool_option,
+ parse_codecs,
+)
+from hypervideo_dl.compat import (
+ compat_chr,
+ compat_etree_fromstring,
+ compat_getenv,
+ compat_os_name,
+ compat_setenv,
+ compat_urlparse,
+ compat_parse_qs,
+)
+
+
+class TestUtil(unittest.TestCase):
+ def test_timeconvert(self):
+ self.assertTrue(timeconvert('') is None)
+ self.assertTrue(timeconvert('bougrg') is None)
+
+ def test_sanitize_filename(self):
+ self.assertEqual(sanitize_filename('abc'), 'abc')
+ self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')
+
+ self.assertEqual(sanitize_filename('123'), '123')
+
+ self.assertEqual('abc_de', sanitize_filename('abc/de'))
+ self.assertFalse('/' in sanitize_filename('abc/de///'))
+
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
+ self.assertEqual('yes no', sanitize_filename('yes? no'))
+ self.assertEqual('this - that', sanitize_filename('this: that'))
+
+ self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
+ aumlaut = 'ä'
+ self.assertEqual(sanitize_filename(aumlaut), aumlaut)
+ tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
+ self.assertEqual(sanitize_filename(tests), tests)
+
+ self.assertEqual(
+ sanitize_filename('New World record at 0:12:34'),
+ 'New World record at 0_12_34')
+
+ self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
+
+ forbidden = '"\0\\/'
+ for fc in forbidden:
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc))
+
+ def test_sanitize_filename_restricted(self):
+ self.assertEqual(sanitize_filename('abc', restricted=True), 'abc')
+ self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e')
+
+ self.assertEqual(sanitize_filename('123', restricted=True), '123')
+
+ self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True))
+ self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True))
+
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True))
+ self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
+ self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
+
+ tests = 'aäb\u4e2d\u56fd\u7684c'
+ self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
+ self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
+
+ forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
+ for fc in forbidden:
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
+
+ # Handle a common case more neatly
+ self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
+ self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
+ # .. but make sure the file name is never empty
+ self.assertTrue(sanitize_filename('-', restricted=True) != '')
+ self.assertTrue(sanitize_filename(':', restricted=True) != '')
+
+ self.assertEqual(sanitize_filename(
+ 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
+ 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
+
+ def test_sanitize_ids(self):
+ self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
+ self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
+ self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
+
+ def test_sanitize_path(self):
+ if sys.platform != 'win32':
+ return
+
+ self.assertEqual(sanitize_path('abc'), 'abc')
+ self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+ self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+ self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+ self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+ self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
+ self.assertEqual(
+ sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+ 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+ self.assertEqual(
+ sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+ 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+ self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+ self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+ self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
+ self.assertEqual(sanitize_path('../abc'), '..\\abc')
+ self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+ self.assertEqual(sanitize_path('./abc'), 'abc')
+ self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
+ def test_sanitize_url(self):
+ self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
+ self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
+ self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+
+ def test_expand_path(self):
+ def env(var):
+ return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
+
+ compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
+ self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
+ self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
+ self.assertEqual(expand_path('~'), compat_getenv('HOME'))
+ self.assertEqual(
+ expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
+ '%s/expanded' % compat_getenv('HOME'))
+
+ def test_prepend_extension(self):
+ self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+
+ def test_replace_extension(self):
+ self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+
+ def test_subtitles_filename(self):
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
+
+ def test_remove_start(self):
+ self.assertEqual(remove_start(None, 'A - '), None)
+ self.assertEqual(remove_start('A - B', 'A - '), 'B')
+ self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
+
+ def test_remove_end(self):
+ self.assertEqual(remove_end(None, ' - B'), None)
+ self.assertEqual(remove_end('A - B', ' - B'), 'A')
+ self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
+
+ def test_remove_quotes(self):
+ self.assertEqual(remove_quotes(None), None)
+ self.assertEqual(remove_quotes('"'), '"')
+ self.assertEqual(remove_quotes("'"), "'")
+ self.assertEqual(remove_quotes(';'), ';')
+ self.assertEqual(remove_quotes('";'), '";')
+ self.assertEqual(remove_quotes('""'), '')
+ self.assertEqual(remove_quotes('";"'), ';')
+
+ def test_ordered_set(self):
+ self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
+ self.assertEqual(orderedSet([]), [])
+ self.assertEqual(orderedSet([1]), [1])
+ # keep the list ordered
+ self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
+
+ def test_unescape_html(self):
+ self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('&#x2F;'), '/')
+ self.assertEqual(unescapeHTML('&#47;'), '/')
+ self.assertEqual(unescapeHTML('&eacute;'), 'é')
+ self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+ self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
+ # HTML5 entities
+ self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
+
+ def test_date_from_str(self):
+ self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
+ self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
+ self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
+ self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
+ self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
+
+ def test_daterange(self):
+ _20century = DateRange("19000101", "20000101")
+ self.assertFalse("17890714" in _20century)
+ _ac = DateRange("00010101")
+ self.assertTrue("19690721" in _ac)
+ _firstmilenium = DateRange(end="10000101")
+ self.assertTrue("07110427" in _firstmilenium)
+
+ def test_unified_dates(self):
+ self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
+ self.assertEqual(unified_strdate('8/7/2009'), '20090708')
+ self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
+ self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
+ self.assertEqual(unified_strdate('1968 12 10'), '19681210')
+ self.assertEqual(unified_strdate('1968-12-10'), '19681210')
+ self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
+ self.assertEqual(
+ unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
+ '20141126')
+ self.assertEqual(
+ unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
+ '20150202')
+ self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
+ self.assertEqual(unified_strdate('25-09-2014'), '20140925')
+ self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
+ self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
+ self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
+ self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
+ self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
+ self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
+ self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
+ self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
+
+ def test_unified_timestamps(self):
+ self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
+ self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
+ self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
+ self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
+ self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
+ self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
+ self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
+ self.assertEqual(
+ unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
+ 1417001400)
+ self.assertEqual(
+ unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
+ 1422902860)
+ self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
+ self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
+ self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
+ self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
+ self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
+ self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
+ self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
+ self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
+ self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
+ self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+
+ def test_determine_ext(self):
+ self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
+ self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
+ self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
+ self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
+ self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
+ self.assertEqual(determine_ext('foobar', None), None)
+
+ def test_find_xpath_attr(self):
+ testxml = '''<root>
+ <node/>
+ <node x="a"/>
+ <node x="a" y="c" />
+ <node x="b" y="d" />
+ <node x="" />
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+
+ self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
+ self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
+ self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None)
+ self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None)
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3])
+ self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4])
+
+ def test_xpath_with_ns(self):
+ testxml = '''<root xmlns:media="http://example.com/">
+ <media:song>
+ <media:author>The Author</media:author>
+ <url>http://server.com/download.mp3</url>
+ </media:song>
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+ find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
+ self.assertTrue(find('media:song') is not None)
+ self.assertEqual(find('media:song/media:author').text, 'The Author')
+ self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
+
+ def test_xpath_element(self):
+ doc = xml.etree.ElementTree.Element('root')
+ div = xml.etree.ElementTree.SubElement(doc, 'div')
+ p = xml.etree.ElementTree.SubElement(div, 'p')
+ p.text = 'Foo'
+ self.assertEqual(xpath_element(doc, 'div/p'), p)
+ self.assertEqual(xpath_element(doc, ['div/p']), p)
+ self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p)
+ self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
+ self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default')
+ self.assertTrue(xpath_element(doc, 'div/bar') is None)
+ self.assertTrue(xpath_element(doc, ['div/bar']) is None)
+ self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None)
+ self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
+ self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True)
+ self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True)
+
+ def test_xpath_text(self):
+ testxml = '''<root>
+ <div>
+ <p>Foo</p>
+ </div>
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+ self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+ self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
+ self.assertTrue(xpath_text(doc, 'div/bar') is None)
+ self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
+ def test_xpath_attr(self):
+ testxml = '''<root>
+ <div>
+ <p x="a">Foo</p>
+ </div>
+ </root>'''
+ doc = compat_etree_fromstring(testxml)
+ self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
+ self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
+ self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
+ self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default')
+ self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default')
+ self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True)
+ self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
+
+ def test_smuggle_url(self):
+ data = {"ö": "ö", "abc": [3]}
+ url = 'https://foo.bar/baz?x=y#a'
+ smug_url = smuggle_url(url, data)
+ unsmug_url, unsmug_data = unsmuggle_url(smug_url)
+ self.assertEqual(url, unsmug_url)
+ self.assertEqual(data, unsmug_data)
+
+ res_url, res_data = unsmuggle_url(url)
+ self.assertEqual(res_url, url)
+ self.assertEqual(res_data, None)
+
+ smug_url = smuggle_url(url, {'a': 'b'})
+ smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
+ res_url, res_data = unsmuggle_url(smug_smug_url)
+ self.assertEqual(res_url, url)
+ self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
+
+ def test_shell_quote(self):
+ args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
+ self.assertEqual(
+ shell_quote(args),
+ """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
+
+ def test_float_or_none(self):
+ self.assertEqual(float_or_none('42.42'), 42.42)
+ self.assertEqual(float_or_none('42'), 42.0)
+ self.assertEqual(float_or_none(''), None)
+ self.assertEqual(float_or_none(None), None)
+ self.assertEqual(float_or_none([]), None)
+ self.assertEqual(float_or_none(set()), None)
+
+ def test_int_or_none(self):
+ self.assertEqual(int_or_none('42'), 42)
+ self.assertEqual(int_or_none(''), None)
+ self.assertEqual(int_or_none(None), None)
+ self.assertEqual(int_or_none([]), None)
+ self.assertEqual(int_or_none(set()), None)
+
+ def test_str_to_int(self):
+ self.assertEqual(str_to_int('123,456'), 123456)
+ self.assertEqual(str_to_int('123.456'), 123456)
+ self.assertEqual(str_to_int(523), 523)
+ # Python 3 has no long
+ if sys.version_info < (3, 0):
+ eval('self.assertEqual(str_to_int(123456L), 123456)')
+ self.assertEqual(str_to_int('noninteger'), None)
+ self.assertEqual(str_to_int([]), None)
+
+ def test_url_basename(self):
+ self.assertEqual(url_basename('http://foo.de/'), '')
+ self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz')
+ self.assertEqual(
+ url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
+ 'trailer.mp4')
+
+ def test_base_url(self):
+ self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
+ self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
+ self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+
+ def test_urljoin(self):
+ self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+ self.assertEqual(urljoin('http://foo.de/', None), None)
+ self.assertEqual(urljoin('http://foo.de/', ''), None)
+ self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
+ self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+ self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+ self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
+
+ def test_url_or_none(self):
+ self.assertEqual(url_or_none(None), None)
+ self.assertEqual(url_or_none(''), None)
+ self.assertEqual(url_or_none('foo'), None)
+ self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+ self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
+ self.assertEqual(url_or_none('http$://foo.de'), None)
+ self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+ self.assertEqual(url_or_none('//foo.de'), '//foo.de')
+ self.assertEqual(url_or_none('s3://foo.de'), None)
+ self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
+ self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
+ self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
+ self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
+
+ def test_parse_age_limit(self):
+ self.assertEqual(parse_age_limit(None), None)
+ self.assertEqual(parse_age_limit(False), None)
+ self.assertEqual(parse_age_limit('invalid'), None)
+ self.assertEqual(parse_age_limit(0), 0)
+ self.assertEqual(parse_age_limit(18), 18)
+ self.assertEqual(parse_age_limit(21), 21)
+ self.assertEqual(parse_age_limit(22), None)
+ self.assertEqual(parse_age_limit('18'), 18)
+ self.assertEqual(parse_age_limit('18+'), 18)
+ self.assertEqual(parse_age_limit('PG-13'), 13)
+ self.assertEqual(parse_age_limit('TV-14'), 14)
+ self.assertEqual(parse_age_limit('TV-MA'), 17)
+ self.assertEqual(parse_age_limit('TV14'), 14)
+ self.assertEqual(parse_age_limit('TV_G'), 0)
+
+ def test_parse_duration(self):
+ self.assertEqual(parse_duration(None), None)
+ self.assertEqual(parse_duration(False), None)
+ self.assertEqual(parse_duration('invalid'), None)
+ self.assertEqual(parse_duration('1'), 1)
+ self.assertEqual(parse_duration('1337:12'), 80232)
+ self.assertEqual(parse_duration('9:12:43'), 33163)
+ self.assertEqual(parse_duration('12:00'), 720)
+ self.assertEqual(parse_duration('00:01:01'), 61)
+ self.assertEqual(parse_duration('x:y'), None)
+ self.assertEqual(parse_duration('3h11m53s'), 11513)
+ self.assertEqual(parse_duration('3h 11m 53s'), 11513)
+ self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
+ self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
+ self.assertEqual(parse_duration('62m45s'), 3765)
+ self.assertEqual(parse_duration('6m59s'), 419)
+ self.assertEqual(parse_duration('49s'), 49)
+ self.assertEqual(parse_duration('0h0m0s'), 0)
+ self.assertEqual(parse_duration('0m0s'), 0)
+ self.assertEqual(parse_duration('0s'), 0)
+ self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
+ self.assertEqual(parse_duration('T30M38S'), 1838)
+ self.assertEqual(parse_duration('5 s'), 5)
+ self.assertEqual(parse_duration('3 min'), 180)
+ self.assertEqual(parse_duration('2.5 hours'), 9000)
+ self.assertEqual(parse_duration('02:03:04'), 7384)
+ self.assertEqual(parse_duration('01:02:03:04'), 93784)
+ self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
+ self.assertEqual(parse_duration('87 Min.'), 5220)
+ self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
+ self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+ self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
+
+ def test_fix_xml_ampersands(self):
+ self.assertEqual(
+ fix_xml_ampersands('"&x=y&z=a'), '"&amp;x=y&amp;z=a')
+ self.assertEqual(
+ fix_xml_ampersands('"&amp;x=y&wrong;&z=a'),
+ '"&amp;x=y&amp;wrong;&amp;z=a')
+ self.assertEqual(
+ fix_xml_ampersands('&amp;&apos;&gt;&lt;&quot;'),
+ '&amp;&apos;&gt;&lt;&quot;')
+ self.assertEqual(
+ fix_xml_ampersands('&#1234;&#x1abC;'), '&#1234;&#x1abC;')
+ self.assertEqual(fix_xml_ampersands('&#&#'), '&amp;#&amp;#')
+
+ def test_paged_list(self):
+ def testPL(size, pagesize, sliceargs, expected):
+ def get_page(pagenum):
+ firstid = pagenum * pagesize
+ upto = min(size, pagenum * pagesize + pagesize)
+ for i in range(firstid, upto):
+ yield i
+
+ pl = OnDemandPagedList(get_page, pagesize)
+ got = pl.getslice(*sliceargs)
+ self.assertEqual(got, expected)
+
+ iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize)
+ got = iapl.getslice(*sliceargs)
+ self.assertEqual(got, expected)
+
+ testPL(5, 2, (), [0, 1, 2, 3, 4])
+ testPL(5, 2, (1,), [1, 2, 3, 4])
+ testPL(5, 2, (2,), [2, 3, 4])
+ testPL(5, 2, (4,), [4])
+ testPL(5, 2, (0, 3), [0, 1, 2])
+ testPL(5, 2, (1, 4), [1, 2, 3])
+ testPL(5, 2, (2, 99), [2, 3, 4])
+ testPL(5, 2, (20, 99), [])
+
+ def test_read_batch_urls(self):
+ f = io.StringIO('''\xef\xbb\xbf foo
+ bar\r
+ baz
+ # More after this line\r
+ ; or after this
+ bam''')
+ self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam'])
+
+ def test_urlencode_postdata(self):
+ data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
+ self.assertTrue(isinstance(data, bytes))
+
+ def test_update_url_query(self):
+ def query_dict(url):
+ return compat_parse_qs(compat_urlparse.urlparse(url).query)
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
+ query_dict('http://example.com/path?quality=HD&format=mp4'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
+ query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'fields': 'id,formats,subtitles'})),
+ query_dict('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
+ query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path?manifest=f4m', {'manifest': []})),
+ query_dict('http://example.com/path'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
+ query_dict('http://example.com/path?system=LINUX'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'fields': b'id,formats,subtitles'})),
+ query_dict('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'width': 1080, 'height': 720})),
+ query_dict('http://example.com/path?width=1080&height=720'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'bitrate': 5020.43})),
+ query_dict('http://example.com/path?bitrate=5020.43'))
+ self.assertEqual(query_dict(update_url_query(
+ 'http://example.com/path', {'test': '第二行тест'})),
+ query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+
+ def test_multipart_encode(self):
+ self.assertEqual(
+ multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
+ b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
+ self.assertEqual(
+ multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
+ b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
+ self.assertRaises(
+ ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
+
+ def test_dict_get(self):
+ FALSE_VALUES = {
+ 'none': None,
+ 'false': False,
+ 'zero': 0,
+ 'empty_string': '',
+ 'empty_list': [],
+ }
+ d = FALSE_VALUES.copy()
+ d['a'] = 42
+ self.assertEqual(dict_get(d, 'a'), 42)
+ self.assertEqual(dict_get(d, 'b'), None)
+ self.assertEqual(dict_get(d, 'b', 42), 42)
+ self.assertEqual(dict_get(d, ('a', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'a', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'c', )), None)
+ self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
+ for key, false_value in FALSE_VALUES.items():
+ self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
+ self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
+
+ def test_merge_dicts(self):
+ self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''})
+ self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+ self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+
+ def test_encode_compat_str(self):
+ self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
+ self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
+
+ def test_parse_iso8601(self):
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
+ self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251)
+ self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None)
+
+ def test_strip_jsonp(self):
+ stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
+ d = json.loads(stripped)
+ self.assertEqual(d, [{"id": "532cb", "x": 3}])
+
+ stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'STATUS': 'OK'})
+
+ stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ def test_strip_or_none(self):
+ self.assertEqual(strip_or_none(' abc'), 'abc')
+ self.assertEqual(strip_or_none('abc '), 'abc')
+ self.assertEqual(strip_or_none(' abc '), 'abc')
+ self.assertEqual(strip_or_none('\tabc\t'), 'abc')
+ self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
+ self.assertEqual(strip_or_none('abc'), 'abc')
+ self.assertEqual(strip_or_none(''), '')
+ self.assertEqual(strip_or_none(None), None)
+ self.assertEqual(strip_or_none(42), None)
+ self.assertEqual(strip_or_none([]), None)
+
+ def test_uppercase_escape(self):
+ self.assertEqual(uppercase_escape('aä'), 'aä')
+ self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
+
+ def test_lowercase_escape(self):
+ self.assertEqual(lowercase_escape('aä'), 'aä')
+ self.assertEqual(lowercase_escape('\\u0026'), '&')
+
+ def test_limit_length(self):
+ self.assertEqual(limit_length(None, 12), None)
+ self.assertEqual(limit_length('foo', 12), 'foo')
+ self.assertTrue(
+ limit_length('foo bar baz asd', 12).startswith('foo bar'))
+ self.assertTrue('...' in limit_length('foo bar baz asd', 12))
+
+ def test_mimetype2ext(self):
+ self.assertEqual(mimetype2ext(None), None)
+ self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
+ self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
+ self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
+ self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
+ self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+ self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
+ self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
+
+ def test_month_by_name(self):
+ self.assertEqual(month_by_name(None), None)
+ self.assertEqual(month_by_name('December', 'en'), 12)
+ self.assertEqual(month_by_name('décembre', 'fr'), 12)
+ self.assertEqual(month_by_name('December'), 12)
+ self.assertEqual(month_by_name('décembre'), None)
+ self.assertEqual(month_by_name('Unknown', 'unknown'), None)
+
+ def test_parse_codecs(self):
+ self.assertEqual(parse_codecs(''), {})
+ self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
+ 'vcodec': 'avc1.77.30',
+ 'acodec': 'mp4a.40.2',
+ })
+ self.assertEqual(parse_codecs('mp4a.40.2'), {
+ 'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ })
+ self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
+ 'vcodec': 'avc1.42001e',
+ 'acodec': 'mp4a.40.5',
+ })
+ self.assertEqual(parse_codecs('avc3.640028'), {
+ 'vcodec': 'avc3.640028',
+ 'acodec': 'none',
+ })
+ self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ })
+ self.assertEqual(parse_codecs('av01.0.05M.08'), {
+ 'vcodec': 'av01.0.05M.08',
+ 'acodec': 'none',
+ })
+ self.assertEqual(parse_codecs('theora, vorbis'), {
+ 'vcodec': 'theora',
+ 'acodec': 'vorbis',
+ })
+ self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
+ 'vcodec': 'unknownvcodec',
+ 'acodec': 'unknownacodec',
+ })
+ self.assertEqual(parse_codecs('unknown'), {})
+
+ def test_escape_rfc3986(self):
+ reserved = "!*'();:@&=+$,/?#[]"
+ unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
+ self.assertEqual(escape_rfc3986(reserved), reserved)
+ self.assertEqual(escape_rfc3986(unreserved), unreserved)
+ self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
+ self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
+ self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
+ self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
+
+ def test_escape_url(self):
+ self.assertEqual(
+ escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
+ 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
+ )
+ self.assertEqual(
+ escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+ 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
+ )
+ self.assertEqual(
+ escape_url('http://тест.рф/фрагмент'),
+ 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
+ )
+ self.assertEqual(
+ escape_url('http://тест.рф/абв?абв=абв#абв'),
+ 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
+ )
+ self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+
+ def test_js_to_json_realworld(self):
+ inp = '''{
+ 'clip':{'provider':'pseudo'}
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "clip":{"provider":"pseudo"}
+ }''')
+ json.loads(js_to_json(inp))
+
+ inp = '''{
+ 'playlist':[{'controls':{'all':null}}]
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "playlist":[{"controls":{"all":null}}]
+ }''')
+
+ inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"'''
+ self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''')
+
+ inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
+ json_code = js_to_json(inp)
+ self.assertEqual(json.loads(json_code), json.loads(inp))
+
+ inp = '''{
+ 0:{src:'skipped', type: 'application/dash+xml'},
+ 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "0":{"src":"skipped", "type": "application/dash+xml"},
+ "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
+ }''')
+
+ inp = '''{"foo":101}'''
+ self.assertEqual(js_to_json(inp), '''{"foo":101}''')
+
+ inp = '''{"duration": "00:01:07"}'''
+ self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
+
+ inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
+ self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
+
+ def test_js_to_json_edgecases(self):
+ on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
+ self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
+
+ on = js_to_json('{"abc": true}')
+ self.assertEqual(json.loads(on), {'abc': True})
+
+ # Ignore JavaScript code as well
+ on = js_to_json('''{
+ "x": 1,
+ y: "a",
+ z: some.code
+ }''')
+ d = json.loads(on)
+ self.assertEqual(d['x'], 1)
+ self.assertEqual(d['y'], 'a')
+
+ # Just drop ! prefix for now though this results in a wrong value
+ on = js_to_json('''{
+ a: !0,
+ b: !1,
+ c: !!0,
+ d: !!42.42,
+ e: !!![],
+ f: !"abc",
+ g: !"",
+ !42: 42
+ }''')
+ self.assertEqual(json.loads(on), {
+ 'a': 0,
+ 'b': 1,
+ 'c': 0,
+ 'd': 42.42,
+ 'e': [],
+ 'f': "abc",
+ 'g': "",
+ '42': 42
+ })
+
+ on = js_to_json('["abc", "def",]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('{"abc": "def",}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
+ on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
+ on = js_to_json('{ 0: /* " \n */ ",]" , }')
+ self.assertEqual(json.loads(on), {'0': ',]'})
+
+ on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
+ self.assertEqual(json.loads(on), {'0': ',]'})
+
+ on = js_to_json('{ 0: // comment\n1 }')
+ self.assertEqual(json.loads(on), {'0': 1})
+
+ on = js_to_json(r'["<p>x<\/p>"]')
+ self.assertEqual(json.loads(on), ['<p>x</p>'])
+
+ on = js_to_json(r'["\xaa"]')
+ self.assertEqual(json.loads(on), ['\u00aa'])
+
+ on = js_to_json("['a\\\nb']")
+ self.assertEqual(json.loads(on), ['ab'])
+
+ on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
+ self.assertEqual(json.loads(on), ['ab'])
+
+ on = js_to_json('{0xff:0xff}')
+ self.assertEqual(json.loads(on), {'255': 255})
+
+ on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'255': 255})
+
+ on = js_to_json('{077:077}')
+ self.assertEqual(json.loads(on), {'63': 63})
+
+ on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'63': 63})
+
+ on = js_to_json('{42:42}')
+ self.assertEqual(json.loads(on), {'42': 42})
+
+ on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
+ self.assertEqual(json.loads(on), {'42': 42})
+
+ on = js_to_json('{42:4.2e1}')
+ self.assertEqual(json.loads(on), {'42': 42.0})
+
+ on = js_to_json('{ "0x40": "0x40" }')
+ self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+ on = js_to_json('{ "040": "040" }')
+ self.assertEqual(json.loads(on), {'040': '040'})
+
+ def test_js_to_json_malformed(self):
+ self.assertEqual(js_to_json('42a1'), '42"a1"')
+ self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+
+ def test_extract_attributes(self):
+ self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
+ self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
+ self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
+ self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'}) # XML
+ self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
+ self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'}) # HTML 3.2
+ self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'}) # HTML 4.0
+ self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
+ self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
+ self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
+ self.assertEqual(extract_attributes('<e x >'), {'x': None})
+ self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
+ self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
+ self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
+ self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
+ self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
+ self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
+ self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
+ self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
+ self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
+ self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
+ self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
+ # "Narrow" Python builds don't support unicode code points outside BMP.
+ try:
+ compat_chr(0x10000)
+ supports_outside_bmp = True
+ except ValueError:
+ supports_outside_bmp = False
+ if supports_outside_bmp:
+ self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+ # Malformed HTML should not break attributes extraction on older Python
+ self.assertEqual(extract_attributes('<mal"formed/>'), {})
+
+ def test_clean_html(self):
+ self.assertEqual(clean_html('a:\nb'), 'a: b')
+ self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
+ self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
+
+ def test_intlist_to_bytes(self):
+ self.assertEqual(
+ intlist_to_bytes([0, 1, 127, 128, 255]),
+ b'\x00\x01\x7f\x80\xff')
+
+ def test_args_to_str(self):
+ self.assertEqual(
+ args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
+ 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
+ )
+
+ def test_parse_filesize(self):
+ self.assertEqual(parse_filesize(None), None)
+ self.assertEqual(parse_filesize(''), None)
+ self.assertEqual(parse_filesize('91 B'), 91)
+ self.assertEqual(parse_filesize('foobar'), None)
+ self.assertEqual(parse_filesize('2 MiB'), 2097152)
+ self.assertEqual(parse_filesize('5 GB'), 5000000000)
+ self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+ self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
+ self.assertEqual(parse_filesize('1,24 KB'), 1240)
+ self.assertEqual(parse_filesize('1,24 kb'), 1240)
+ self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
+
+ def test_parse_count(self):
+ self.assertEqual(parse_count(None), None)
+ self.assertEqual(parse_count(''), None)
+ self.assertEqual(parse_count('0'), 0)
+ self.assertEqual(parse_count('1000'), 1000)
+ self.assertEqual(parse_count('1.000'), 1000)
+ self.assertEqual(parse_count('1.1k'), 1100)
+ self.assertEqual(parse_count('1.1kk'), 1100000)
+ self.assertEqual(parse_count('1.1kk '), 1100000)
+ self.assertEqual(parse_count('1.1kk views'), 1100000)
+
+ def test_parse_resolution(self):
+ self.assertEqual(parse_resolution(None), {})
+ self.assertEqual(parse_resolution(''), {})
+ self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('720p'), {'height': 720})
+ self.assertEqual(parse_resolution('4k'), {'height': 2160})
+ self.assertEqual(parse_resolution('8K'), {'height': 4320})
+
+ def test_parse_bitrate(self):
+ self.assertEqual(parse_bitrate(None), None)
+ self.assertEqual(parse_bitrate(''), None)
+ self.assertEqual(parse_bitrate('300kbps'), 300)
+ self.assertEqual(parse_bitrate('1500kbps'), 1500)
+ self.assertEqual(parse_bitrate('300 kbps'), 300)
+
+ def test_version_tuple(self):
+ self.assertEqual(version_tuple('1'), (1,))
+ self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
+ self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
+
+ def test_detect_exe_version(self):
+ self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
+built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4)
+configuration: --prefix=/usr --extra-'''), '1.2.1')
+ self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685
+built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685')
+ self.assertEqual(detect_exe_version('''X server found. dri2 connection failed!
+Trying to open render node...
+Success at /dev/dri/renderD128.
+ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
+
+ def test_age_restricted(self):
+ self.assertFalse(age_restricted(None, 10)) # unrestricted content
+ self.assertFalse(age_restricted(1, None)) # unrestricted policy
+ self.assertFalse(age_restricted(8, 10))
+ self.assertTrue(age_restricted(18, 14))
+ self.assertFalse(age_restricted(18, 18))
+
+ def test_is_html(self):
+ self.assertFalse(is_html(b'\x49\x44\x43<html'))
+ self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
+ self.assertTrue(is_html( # UTF-8 with BOM
+ b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
+ self.assertTrue(is_html( # UTF-16-LE
+ b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
+ ))
+ self.assertTrue(is_html( # UTF-16-BE
+ b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
+ ))
+ self.assertTrue(is_html( # UTF-32-BE
+ b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
+ self.assertTrue(is_html( # UTF-32-LE
+ b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
+
+ def test_render_table(self):
+ self.assertEqual(
+ render_table(
+ ['a', 'bcd'],
+ [[123, 4], [9999, 51]]),
+ 'a bcd\n'
+ '123 4\n'
+ '9999 51')
+
+ def test_match_str(self):
+ self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+ self.assertFalse(match_str('xy', {'x': 1200}))
+ self.assertTrue(match_str('!xy', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 1200}))
+ self.assertFalse(match_str('!x', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 0}))
+ self.assertFalse(match_str('x>0', {'x': 0}))
+ self.assertFalse(match_str('x>0', {}))
+ self.assertTrue(match_str('x>?0', {}))
+ self.assertTrue(match_str('x>1K', {'x': 1200}))
+ self.assertFalse(match_str('x>2K', {'x': 1200}))
+ self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
+ self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+ self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 90, 'description': 'foo'}))
+ self.assertTrue(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 10}))
+ self.assertTrue(match_str('is_live', {'is_live': True}))
+ self.assertFalse(match_str('is_live', {'is_live': False}))
+ self.assertFalse(match_str('is_live', {'is_live': None}))
+ self.assertFalse(match_str('is_live', {}))
+ self.assertFalse(match_str('!is_live', {'is_live': True}))
+ self.assertTrue(match_str('!is_live', {'is_live': False}))
+ self.assertTrue(match_str('!is_live', {'is_live': None}))
+ self.assertTrue(match_str('!is_live', {}))
+ self.assertTrue(match_str('title', {'title': 'abc'}))
+ self.assertTrue(match_str('title', {'title': ''}))
+ self.assertFalse(match_str('!title', {'title': 'abc'}))
+ self.assertFalse(match_str('!title', {'title': ''}))
+
+ def test_parse_dfxp_time_expr(self):
+ self.assertEqual(parse_dfxp_time_expr(None), None)
+ self.assertEqual(parse_dfxp_time_expr(''), None)
+ self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1)
+
+ def test_dfxp2srt(self):
+ dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
+ <p begin="1" end="2">第二行<br/>♪♪</p>
+ <p begin="2" dur="1"><span>Third<br/>Line</span></p>
+ <p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
+ <p begin="-1" end="-1">Ignore, two</p>
+ <p begin="3" dur="-1">Ignored, three</p>
+ </div>
+ </body>
+ </tt>'''.encode('utf-8')
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The following line contains Chinese characters and special symbols
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+♪♪
+
+3
+00:00:02,000 --> 00:00:03,000
+Third
+Line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data), srt_data)
+
+ dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The first line</p>
+ </div>
+ </body>
+ </tt>'''.encode('utf-8')
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The first line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
+
+ dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
+<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
+ <head>
+ <styling>
+ <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
+ <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
+ <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
+ <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
+ </styling>
+ </head>
+ <body tts:textAlign="center" style="s0">
+ <div>
+ <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
+ <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
+ <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
+ <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
+ </div>
+ </body>
+</tt>'''.encode('utf-8')
+ srt_data = '''1
+00:00:02,080 --> 00:00:05,839
+<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
+
+2
+00:00:02,080 --> 00:00:05,839
+<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
+</font>part 2</font></b>
+
+3
+00:00:05,839 --> 00:00:09,560
+<u><font color="lime">line 3
+part 3</font></u>
+
+4
+00:00:09,560 --> 00:00:12,359
+<i><u><font color="yellow"><font color="lime">inner
+ </font>style</font></u></i>
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+
+ dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">Line 1</p>
+ <p begin="1" end="2">第二行</p>
+ </div>
+ </body>
+ </tt>'''.encode('utf-16')
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+Line 1
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
+
+ def test_cli_option(self):
+ self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
+ self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
+ self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
+ self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10'])
+
+ def test_cli_valueless_option(self):
+ self.assertEqual(cli_valueless_option(
+ {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader'])
+ self.assertEqual(cli_valueless_option(
+ {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), [])
+ self.assertEqual(cli_valueless_option(
+ {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate'])
+ self.assertEqual(cli_valueless_option(
+ {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), [])
+ self.assertEqual(cli_valueless_option(
+ {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), [])
+ self.assertEqual(cli_valueless_option(
+ {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate'])
+
+ def test_cli_bool_option(self):
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'),
+ ['--no-check-certificate', 'true'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='),
+ ['--no-check-certificate=true'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
+ ['--check-certificate', 'false'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ ['--check-certificate=false'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
+ ['--check-certificate', 'true'])
+ self.assertEqual(
+ cli_bool_option(
+ {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ ['--check-certificate=true'])
+ self.assertEqual(
+ cli_bool_option(
+ {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ [])
+
+ def test_ohdave_rsa_encrypt(self):
+ N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
+ e = 65537
+
+ self.assertEqual(
+ ohdave_rsa_encrypt(b'aa111222', e, N),
+ '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
+
+ def test_pkcs1pad(self):
+ data = [1, 2, 3]
+ padded_data = pkcs1pad(data, 32)
+ self.assertEqual(padded_data[:2], [0, 2])
+ self.assertEqual(padded_data[28:], [0, 1, 2, 3])
+
+ self.assertRaises(ValueError, pkcs1pad, data, 8)
+
+ def test_encode_base_n(self):
+ self.assertEqual(encode_base_n(0, 30), '0')
+ self.assertEqual(encode_base_n(80, 30), '2k')
+
+ custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA'
+ self.assertEqual(encode_base_n(0, 30, custom_table), '9')
+ self.assertEqual(encode_base_n(80, 30, custom_table), '7P')
+
+ self.assertRaises(ValueError, encode_base_n, 0, 70)
+ self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
+
+ def test_caesar(self):
+ self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
+ self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
+ self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
+ self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
+ self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
+ self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
+
+ def test_rot47(self):
+ self.assertEqual(rot47('hypervideo'), r'J@FEF36\5=')
+ self.assertEqual(rot47('HYPERVIDEO'), r'*~&%&qt\s{')
+
+ def test_urshift(self):
+ self.assertEqual(urshift(3, 1), 1)
+ self.assertEqual(urshift(-3, 1), 2147483646)
+
+ def test_get_element_by_class(self):
+ html = '''
+ <span class="foo bar">nice</span>
+ '''
+
+ self.assertEqual(get_element_by_class('foo', html), 'nice')
+ self.assertEqual(get_element_by_class('no-such-class', html), None)
+
+ def test_get_element_by_attribute(self):
+ html = '''
+ <span class="foo bar">nice</span>
+ '''
+
+ self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
+ self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
+ self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
+
+ html = '''
+ <div itemprop="author" itemscope>foo</div>
+ '''
+
+ self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+
+ def test_get_elements_by_class(self):
+ html = '''
+ <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+ '''
+
+ self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
+ self.assertEqual(get_elements_by_class('no-such-class', html), [])
+
+ def test_get_elements_by_attribute(self):
+ html = '''
+ <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+ '''
+
+ self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
+ self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
+ self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+
+ def test_clean_podcast_url(self):
+ self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
+ self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
new file mode 100644
index 0000000..aaeb350
--- /dev/null
+++ b/test/test_verbose_output.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import unittest
+
+import sys
+import os
+import subprocess
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+class TestVerboseOutput(unittest.TestCase):
+ def test_private_info_arg(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ '--username', 'johnsmith@gmail.com',
+ '--password', 'secret',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'--username' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'--password' in serr)
+ self.assertTrue(b'secret' not in serr)
+
+ def test_private_info_shortarg(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ '-u', 'johnsmith@gmail.com',
+ '-p', 'secret',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'-u' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'-p' in serr)
+ self.assertTrue(b'secret' not in serr)
+
+ def test_private_info_eq(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ '--username=johnsmith@gmail.com',
+ '--password=secret',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'--username' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'--password' in serr)
+ self.assertTrue(b'secret' not in serr)
+
+ def test_private_info_shortarg_eq(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ '-u=johnsmith@gmail.com',
+ '-p=secret',
+ ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'-u' in serr)
+ self.assertTrue(b'johnsmith' not in serr)
+ self.assertTrue(b'-p' in serr)
+ self.assertTrue(b'secret' not in serr)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
new file mode 100644
index 0000000..6f6c7ab
--- /dev/null
+++ b/test/test_write_annotations.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_params, try_rm
+
+
+import io
+
+import xml.etree.ElementTree
+
+import hypervideo_dl.YoutubeDL
+import hypervideo_dl.extractor
+
+
+class YoutubeDL(hypervideo_dl.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ super(YoutubeDL, self).__init__(*args, **kwargs)
+ self.to_stderr = self.to_screen
+
+
+params = get_params({
+ 'writeannotations': True,
+ 'skip_download': True,
+ 'writeinfojson': False,
+ 'format': 'flv',
+})
+
+
+TEST_ID = 'gr51aVj-mLg'
+ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
+EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
+
+
+class TestAnnotations(unittest.TestCase):
+ def setUp(self):
+ # Clear old files
+ self.tearDown()
+
+ def test_info_json(self):
+ expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
+ ie = hypervideo_dl.extractor.YoutubeIE()
+ ydl = YoutubeDL(params)
+ ydl.add_info_extractor(ie)
+ ydl.download([TEST_ID])
+ self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
+ annoxml = None
+ with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
+ annoxml = xml.etree.ElementTree.parse(annof)
+ self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
+ root = annoxml.getroot()
+ self.assertEqual(root.tag, 'document')
+ annotationsTag = root.find('annotations')
+ self.assertEqual(annotationsTag.tag, 'annotations')
+ annotations = annotationsTag.findall('annotation')
+
+ # Not all the annotations have TEXT children and the annotations are returned unsorted.
+ for a in annotations:
+ self.assertEqual(a.tag, 'annotation')
+ if a.get('type') == 'text':
+ textTag = a.find('TEXT')
+ text = textTag.text
+ self.assertTrue(text in expected) # assertIn only added in python 2.7
+ # remove the first occurrence, there could be more than one annotation with the same text
+ expected.remove(text)
+ # We should have seen (and removed) all the expected annotation texts.
+ self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
+
+ def tearDown(self):
+ try_rm(ANNOTATIONS_FILE)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
new file mode 100644
index 0000000..cecba65
--- /dev/null
+++ b/test/test_youtube_lists.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL
+
+
+from hypervideo_dl.extractor import (
+ YoutubePlaylistIE,
+ YoutubeTabIE,
+ YoutubeIE,
+)
+
+
+class TestYoutubeLists(unittest.TestCase):
+ def assertIsPlaylist(self, info):
+ """Make sure the info has '_type' set to 'playlist'"""
+ self.assertEqual(info['_type'], 'playlist')
+
+ def test_youtube_playlist_noplaylist(self):
+ dl = FakeYDL()
+ dl.params['noplaylist'] = True
+ ie = YoutubePlaylistIE(dl)
+ result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+ self.assertEqual(result['_type'], 'url')
+ self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
+
+ def test_youtube_course(self):
+ dl = FakeYDL()
+ ie = YoutubePlaylistIE(dl)
+ # TODO find a > 100 (paginating?) videos course
+ result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+ entries = list(result['entries'])
+ self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
+ self.assertEqual(len(entries), 25)
+ self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
+
+ def test_youtube_mix(self):
+ dl = FakeYDL()
+ ie = YoutubePlaylistIE(dl)
+ result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
+ entries = result['entries']
+ self.assertTrue(len(entries) >= 50)
+ original_video = entries[0]
+ self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
+
+ def test_youtube_toptracks(self):
+ print('Skipping: The playlist page gives error 500')
+ return
+ dl = FakeYDL()
+ ie = YoutubePlaylistIE(dl)
+ result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
+ entries = result['entries']
+ self.assertEqual(len(entries), 100)
+
+ def test_youtube_flat_playlist_extraction(self):
+ dl = FakeYDL()
+ dl.params['extract_flat'] = True
+ ie = YoutubeTabIE(dl)
+ result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
+ self.assertIsPlaylist(result)
+ entries = list(result['entries'])
+ self.assertTrue(len(entries) == 1)
+ video = entries[0]
+ self.assertEqual(video['_type'], 'url_transparent')
+ self.assertEqual(video['ie_key'], 'Youtube')
+ self.assertEqual(video['id'], 'BaW_jenozKc')
+ self.assertEqual(video['url'], 'BaW_jenozKc')
+ self.assertEqual(video['title'], 'hypervideo test video "\'/\\ä↭𝕐')
+ self.assertEqual(video['duration'], 10)
+ self.assertEqual(video['uploader'], 'Philipp Hagemeister')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py
new file mode 100644
index 0000000..1739f5d
--- /dev/null
+++ b/test/test_youtube_misc.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from hypervideo_dl.extractor import YoutubeIE
+
+
+class TestYoutubeMisc(unittest.TestCase):
+ def test_youtube_extract(self):
+ assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
+ assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+ assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/testcert.pem b/test/testcert.pem
new file mode 100644
index 0000000..b3e0f00
--- /dev/null
+++ b/test/testcert.pem
@@ -0,0 +1,52 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
+HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
+Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
+A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
+mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
+aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
+tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
+BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
+Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
+63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
+ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
+8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
+XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
+G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
+zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
+8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
+gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
+XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
+Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
+98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
+1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
+DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
+Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
+FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
+pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
+VJua1Wn8HKxnXMI61DhTCSo=
+-----END PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
+VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
+A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
+c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
+aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
+CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
+MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
+IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
+Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
+zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
+YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
+O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
+A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
+KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
+Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
+ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
+AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
+5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
+PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
+cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
+SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
+Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
+-----END CERTIFICATE-----
diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt
new file mode 100644
index 0000000..c46541d
--- /dev/null
+++ b/test/testdata/cookies/httponly_cookies.txt
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
+www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt
new file mode 100644
index 0000000..17bc403
--- /dev/null
+++ b/test/testdata/cookies/malformed_cookies.txt
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar FALSE / FALSE 0 COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt
new file mode 100644
index 0000000..f6996f0
--- /dev/null
+++ b/test/testdata/cookies/session_cookies.txt
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
+www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m
new file mode 100644
index 0000000..74e1539
--- /dev/null
+++ b/test/testdata/f4m/custom_base_url.f4m
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest xmlns="http://ns.adobe.com/f4m/1.0">
+ <streamType>recorded</streamType>
+ <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
+ <duration>269.293</duration>
+ <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
+ <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
+ <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
+ </media>
+</manifest>
diff --git a/test/testdata/m3u8/pluzz_francetv_11507.m3u8 b/test/testdata/m3u8/pluzz_francetv_11507.m3u8
new file mode 100644
index 0000000..0809f5a
--- /dev/null
+++ b/test/testdata/m3u8/pluzz_francetv_11507.m3u8
@@ -0,0 +1,14 @@
+#EXTM3U
+ #EXT-X-VERSION:5
+ #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
+ #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0
diff --git a/test/testdata/m3u8/teamcoco_11995.m3u8 b/test/testdata/m3u8/teamcoco_11995.m3u8
new file mode 100644
index 0000000..a6e4216
--- /dev/null
+++ b/test/testdata/m3u8/teamcoco_11995.m3u8
@@ -0,0 +1,16 @@
+#EXTM3U
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-2m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-1m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8
diff --git a/test/testdata/m3u8/ted_18923.m3u8 b/test/testdata/m3u8/ted_18923.m3u8
new file mode 100644
index 0000000..52a2711
--- /dev/null
+++ b/test/testdata/m3u8/ted_18923.m3u8
@@ -0,0 +1,28 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
+/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
+/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
+/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
+/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
+/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
+
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
diff --git a/test/testdata/m3u8/toggle_mobile_12211.m3u8 b/test/testdata/m3u8/toggle_mobile_12211.m3u8
new file mode 100644
index 0000000..69604e6
--- /dev/null
+++ b/test/testdata/m3u8/toggle_mobile_12211.m3u8
@@ -0,0 +1,13 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8
diff --git a/test/testdata/m3u8/twitch_vod.m3u8 b/test/testdata/m3u8/twitch_vod.m3u8
new file mode 100644
index 0000000..7617277
--- /dev/null
+++ b/test/testdata/m3u8/twitch_vod.m3u8
@@ -0,0 +1,20 @@
+#EXTM3U
+#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8
diff --git a/test/testdata/m3u8/vidio.m3u8 b/test/testdata/m3u8/vidio.m3u8
new file mode 100644
index 0000000..89c2444
--- /dev/null
+++ b/test/testdata/m3u8/vidio.m3u8
@@ -0,0 +1,10 @@
+#EXTM3U
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8
diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd
new file mode 100644
index 0000000..8dc1d2d
--- /dev/null
+++ b/test/testdata/mpd/float_duration.mpd
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
+ <Period bitstreamSwitching="true">
+ <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
+ <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+ <Representation id="318597" bandwidth="61587"></Representation>
+ </AdaptationSet>
+ <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
+ <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+ <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
+ <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
+ <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
+ <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
+ <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
+ <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
+ </AdaptationSet>
+ </Period>
+</MPD> \ No newline at end of file
diff --git a/test/testdata/mpd/unfragmented.mpd b/test/testdata/mpd/unfragmented.mpd
new file mode 100644
index 0000000..5a3720b
--- /dev/null
+++ b/test/testdata/mpd/unfragmented.mpd
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+ <Period duration="PT54.915S">
+ <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
+ <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360">
+ <BaseURL>DASH_360</BaseURL>
+ <SegmentBase indexRange="915-1114" indexRangeExact="true">
+ <Initialization range="0-914"/>
+ </SegmentBase>
+ </Representation>
+ <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240">
+ <BaseURL>DASH_240</BaseURL>
+ <SegmentBase indexRange="913-1112" indexRangeExact="true">
+ <Initialization range="0-912"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet>
+ <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1">
+ <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
+ <BaseURL>audio</BaseURL>
+ <SegmentBase indexRange="832-1007" indexRangeExact="true">
+ <Initialization range="0-831"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd
new file mode 100644
index 0000000..2b9d595
--- /dev/null
+++ b/test/testdata/mpd/urls_only.mpd
@@ -0,0 +1,218 @@
+<?xml version="1.0" ?>
+<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+ <Period duration="PT0H4M1.728S">
+ <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
+ <ContentComponent contentType="video" id="1"/>
+ <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
+ <SegmentList duration="10000" timescale="1000">
+ <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
+ <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
+ </SegmentList>
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf
new file mode 100644
index 0000000..b7f0086
--- /dev/null
+++ b/test/testdata/xspf/foo_xspf.xspf
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<playlist version="1" xmlns="http://xspf.org/ns/0/">
+ <date>2018-03-09T18:01:43Z</date>
+ <trackList>
+ <track>
+ <location>cd1/track%201.mp3</location>
+ <title>Pandemonium</title>
+ <creator>Foilverb</creator>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <album>Pandemonium EP</album>
+ <trackNum>1</trackNum>
+ <duration>202416</duration>
+ </track>
+ <track>
+ <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
+ <title>Final Cartridge (Nichico Twelve Remix)</title>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <creator>Foilverb</creator>
+ <album>Pandemonium EP</album>
+ <trackNum>2</trackNum>
+ <duration>255857</duration>
+ </track>
+ <track>
+ <location>track3.mp3</location>
+ <location>https://example.com/track3.mp3</location>
+ <title>Rebuilding Nightingale</title>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <creator>Foilverb</creator>
+ <album>Pandemonium EP</album>
+ <trackNum>3</trackNum>
+ <duration>287915</duration>
+ </track>
+ </trackList>
+</playlist>
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..f98aec6
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,13 @@
+[tox]
+envlist = py26,py27,py33,py34,py35
+[testenv]
+deps =
+ nose
+ coverage
+# We need a valid $HOME for test_compat_expanduser
+passenv = HOME
+defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
+ --exclude test_subtitles.py --exclude test_write_annotations.py
+ --exclude test_youtube_lists.py --exclude test_socks.py
+commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=hypervideo_dl --cover-html
+ # test.test_download:TestDownload.test_NowVideo